Tree - kernel/kernel-source - Pagure for openSUSE

kernel / kernel-source

Source
Stats
Files

Commit: f7b8af8f43677d10f4797d715531f84696ef7d9f
Blob Blame History Raw
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 4 Aug 2017 19:09:07 +0200
Subject: Add the hotplug rework including all its side stories
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git
Git-commit: 6fb87cb1181786dc88432c6c6093a02a897e5789
Patch-mainline: Queued in subsystem maintainer repository
References: SLE15 Realtime Extension

This is the hotplug rework. It includes dropping the old hotplug
workarounds, adding new rwlock implementation for RT, cherry-picking a
few commits from upstream and so on.
This is an all-in-one commit since dropping old patches and replacing
with ones is quite painfull.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Mike Galbraith <mgalbraith@suse.de>
---
 arch/alpha/include/asm/spinlock_types.h       |    4 
 arch/arm/include/asm/spinlock_types.h         |    4 
 arch/arm/kernel/hw_breakpoint.c               |   11 
 arch/arm/kernel/patch.c                       |    2 
 arch/arm/probes/kprobes/core.c                |    3 
 arch/arm64/include/asm/insn.h                 |    1 
 arch/arm64/include/asm/spinlock_types.h       |    4 
 arch/arm64/kernel/insn.c                      |    5 
 arch/blackfin/include/asm/spinlock_types.h    |    4 
 arch/hexagon/include/asm/spinlock_types.h     |    4 
 arch/ia64/include/asm/spinlock_types.h        |    4 
 arch/m32r/include/asm/spinlock_types.h        |    4 
 arch/metag/include/asm/spinlock_types.h       |    4 
 arch/mips/include/asm/spinlock_types.h        |    4 
 arch/mn10300/include/asm/spinlock_types.h     |    4 
 arch/powerpc/include/asm/spinlock_types.h     |    4 
 arch/powerpc/kvm/book3s_hv.c                  |   14 
 arch/powerpc/platforms/powernv/subcore.c      |    7 
 arch/s390/include/asm/spinlock_types.h        |    4 
 arch/s390/kernel/kprobes.c                    |    4 
 arch/s390/kernel/time.c                       |    6 
 arch/sh/include/asm/spinlock_types.h          |    4 
 arch/sparc/include/asm/spinlock_types.h       |    4 
 arch/tile/include/asm/spinlock_types.h        |    4 
 arch/x86/events/core.c                        |    1 
 arch/x86/events/intel/core.c                  |   11 
 arch/x86/kernel/cpu/mtrr/main.c               |    2 
 arch/xtensa/include/asm/spinlock_types.h      |    4 
 drivers/acpi/processor_driver.c               |    4 
 drivers/acpi/processor_throttling.c           |   16 
 drivers/cpufreq/cpufreq.c                     |   21 
 drivers/hwtracing/coresight/coresight-etm3x.c |   20 
 drivers/hwtracing/coresight/coresight-etm4x.c |   20 
 drivers/pci/pci-driver.c                      |   47 +-
 include/linux/cpu.h                           |   40 +
 include/linux/cpuhotplug.h                    |   38 +
 include/linux/locallock.h                     |   22 -
 include/linux/padata.h                        |    3 
 include/linux/pci.h                           |    1 
 include/linux/perf_event.h                    |    2 
 include/linux/rwlock_rt.h                     |   45 +-
 include/linux/rwlock_types_rt.h               |   59 ++
 include/linux/sched.h                         |   18 
 include/linux/spinlock_rt.h                   |    7 
 include/linux/spinlock_types_up.h             |    4 
 include/linux/stop_machine.h                  |   26 +
 kernel/Kconfig.locks                          |   17 
 kernel/cpu.c                                  |  552 ++++++--------------------
 kernel/events/core.c                          |  106 +++-
 kernel/kprobes.c                              |   59 +-
 kernel/locking/Makefile                       |    2 
 kernel/locking/rt.c                           |  131 ------
 kernel/locking/rtmutex.c                      |   96 +---
 kernel/locking/rtmutex_common.h               |    4 
 kernel/locking/rwlock-rt.c                    |  400 ++++++++++++++++++
 kernel/padata.c                               |   43 +-
 kernel/sched/core.c                           |   82 ---
 kernel/stop_machine.c                         |   11 
 58 files changed, 1006 insertions(+), 1021 deletions(-)
 create mode 100644 kernel/locking/rwlock-rt.c

--- a/arch/alpha/include/asm/spinlock_types.h
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef _ALPHA_SPINLOCK_TYPES_H
 #define _ALPHA_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 #define TICKET_SHIFT	16
 
 typedef struct {
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1090,7 +1090,7 @@ static int __init arch_hw_breakpoint_ini
 	 * driven low on this core and there isn't an architected way to
 	 * determine that.
 	 */
-	get_online_cpus();
+	cpus_read_lock();
 	register_undef_hook(&debug_reg_hook);
 
 	/*
@@ -1098,15 +1098,16 @@ static int __init arch_hw_breakpoint_ini
 	 * assume that a halting debugger will leave the world in a nice state
 	 * for us.
 	 */
-	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm/hw_breakpoint:online",
-				dbg_reset_online, NULL);
+	ret = cpuhp_setup_state_cpuslocked(CPUHP_AP_ONLINE_DYN,
+					   "arm/hw_breakpoint:online",
+					   dbg_reset_online, NULL);
 	unregister_undef_hook(&debug_reg_hook);
 	if (WARN_ON(ret < 0) || !cpumask_empty(&debug_err_mask)) {
 		core_num_brps = 0;
 		core_num_wrps = 0;
 		if (ret > 0)
 			cpuhp_remove_state_nocalls(ret);
-		put_online_cpus();
+		cpus_read_unlock();
 		return 0;
 	}
 
@@ -1124,7 +1125,7 @@ static int __init arch_hw_breakpoint_ini
 			TRAP_HWBKPT, "watchpoint debug exception");
 	hook_ifault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
 			TRAP_HWBKPT, "breakpoint debug exception");
-	put_online_cpus();
+	cpus_read_unlock();
 
 	/* Register PM notifiers. */
 	pm_init();
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@@ -124,5 +124,5 @@ void __kprobes patch_text(void *addr, un
 		.insn = insn,
 	};
 
-	stop_machine(patch_text_stop_machine, &patch, NULL);
+	stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL);
 }
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -182,7 +182,8 @@ void __kprobes kprobes_remove_breakpoint
 		.addr = addr,
 		.insn = insn,
 	};
-	stop_machine(__kprobes_remove_breakpoint, &p, cpu_online_mask);
+	stop_machine_cpuslocked(__kprobes_remove_breakpoint, &p,
+				cpu_online_mask);
 }
 
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -433,7 +433,6 @@ u32 aarch64_set_branch_offset(u32 insn,
 bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
 
 int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
-int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
 int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
 
 s32 aarch64_insn_adrp_get_offset(u32 insn);
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -16,10 +16,6 @@
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
-# error "please don't include this file directly"
-#endif
-
 #include <linux/types.h>
 
 #define TICKET_SHIFT	16
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -255,6 +255,7 @@ static int __kprobes aarch64_insn_patch_
 	return ret;
 }
 
+static
 int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
 {
 	struct aarch64_insn_patch patch = {
@@ -267,8 +268,8 @@ int __kprobes aarch64_insn_patch_text_sy
 	if (cnt <= 0)
 		return -EINVAL;
 
-	return stop_machine(aarch64_insn_patch_text_cb, &patch,
-			    cpu_online_mask);
+	return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
+				       cpu_online_mask);
 }
 
 int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
--- a/arch/blackfin/include/asm/spinlock_types.h
+++ b/arch/blackfin/include/asm/spinlock_types.h
@@ -7,10 +7,6 @@
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 #include <asm/rwlock.h>
 
 typedef struct {
--- a/arch/hexagon/include/asm/spinlock_types.h
+++ b/arch/hexagon/include/asm/spinlock_types.h
@@ -21,10 +21,6 @@
 #ifndef _ASM_SPINLOCK_TYPES_H
 #define _ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef _ASM_IA64_SPINLOCK_TYPES_H
 #define _ASM_IA64_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
--- a/arch/m32r/include/asm/spinlock_types.h
+++ b/arch/m32r/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef _ASM_M32R_SPINLOCK_TYPES_H
 #define _ASM_M32R_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	volatile int slock;
 } arch_spinlock_t;
--- a/arch/metag/include/asm/spinlock_types.h
+++ b/arch/metag/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef _ASM_METAG_SPINLOCK_TYPES_H
 #define _ASM_METAG_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
--- a/arch/mips/include/asm/spinlock_types.h
+++ b/arch/mips/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef _ASM_SPINLOCK_TYPES_H
 #define _ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 #include <linux/types.h>
 
 #include <asm/byteorder.h>
--- a/arch/mn10300/include/asm/spinlock_types.h
+++ b/arch/mn10300/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef _ASM_SPINLOCK_TYPES_H
 #define _ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct arch_spinlock {
 	unsigned int slock;
 } arch_spinlock_t;
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
 #define _ASM_POWERPC_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	volatile unsigned int slock;
 } arch_spinlock_t;
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3871,7 +3871,7 @@ void kvmppc_alloc_host_rm_ops(void)
 		return;
 	}
 
-	get_online_cpus();
+	cpus_read_lock();
 
 	for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
 		if (!cpu_online(cpu))
@@ -3893,17 +3893,17 @@ void kvmppc_alloc_host_rm_ops(void)
 	l_ops = (unsigned long) ops;
 
 	if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
-		put_online_cpus();
+		cpus_read_unlock();
 		kfree(ops->rm_core);
 		kfree(ops);
 		return;
 	}
 
-	cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
-				  "ppc/kvm_book3s:prepare",
-				  kvmppc_set_host_core,
-				  kvmppc_clear_host_core);
-	put_online_cpus();
+	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+					     "ppc/kvm_book3s:prepare",
+					     kvmppc_set_host_core,
+					     kvmppc_clear_host_core);
+	cpus_read_unlock();
 }
 
 void kvmppc_free_host_rm_ops(void)
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -349,7 +349,7 @@ static int set_subcores_per_core(int new
 		state->master = 0;
 	}
 
-	get_online_cpus();
+	cpus_read_lock();
 
 	/* This cpu will update the globals before exiting stop machine */
 	this_cpu_ptr(&split_state)->master = 1;
@@ -357,9 +357,10 @@ static int set_subcores_per_core(int new
 	/* Ensure state is consistent before we call the other cpus */
 	mb();
 
-	stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
+	stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
+				cpu_online_mask);
 
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return 0;
 }
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	int lock;
 } __attribute__ ((aligned (4))) arch_spinlock_t;
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -196,7 +196,7 @@ void arch_arm_kprobe(struct kprobe *p)
 {
 	struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
 
-	stop_machine(swap_instruction, &args, NULL);
+	stop_machine_cpuslocked(swap_instruction, &args, NULL);
 }
 NOKPROBE_SYMBOL(arch_arm_kprobe);
 
@@ -204,7 +204,7 @@ void arch_disarm_kprobe(struct kprobe *p
 {
 	struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
 
-	stop_machine(swap_instruction, &args, NULL);
+	stop_machine_cpuslocked(swap_instruction, &args, NULL);
 }
 NOKPROBE_SYMBOL(arch_disarm_kprobe);
 
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -677,10 +677,10 @@ static void stp_work_fn(struct work_stru
 		goto out_unlock;
 
 	memset(&stp_sync, 0, sizeof(stp_sync));
-	get_online_cpus();
+	cpus_read_lock();
 	atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
-	stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask);
-	put_online_cpus();
+	stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+	cpus_read_unlock();
 
 	if (!check_sync_clock())
 		/*
--- a/arch/sh/include/asm/spinlock_types.h
+++ b/arch/sh/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef __ASM_SH_SPINLOCK_TYPES_H
 #define __ASM_SH_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	volatile unsigned int lock;
 } arch_spinlock_t;
--- a/arch/sparc/include/asm/spinlock_types.h
+++ b/arch/sparc/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef __SPARC_SPINLOCK_TYPES_H
 #define __SPARC_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	volatile unsigned char lock;
 } arch_spinlock_t;
--- a/arch/tile/include/asm/spinlock_types.h
+++ b/arch/tile/include/asm/spinlock_types.h
@@ -15,10 +15,6 @@
 #ifndef _ASM_TILE_SPINLOCK_TYPES_H
 #define _ASM_TILE_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 #ifdef __tilegx__
 
 /* Low 15 bits are "next"; high 15 bits are "current". */
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2302,7 +2302,6 @@ void perf_check_microcode(void)
 	if (x86_pmu.check_microcode)
 		x86_pmu.check_microcode();
 }
-EXPORT_SYMBOL_GPL(perf_check_microcode);
 
 static struct pmu pmu = {
 	.pmu_enable		= x86_pmu_enable,
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3889,12 +3889,10 @@ static void intel_snb_check_microcode(vo
 	int pebs_broken = 0;
 	int cpu;
 
-	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		if ((pebs_broken = intel_snb_pebs_broken(cpu)))
 			break;
 	}
-	put_online_cpus();
 
 	if (pebs_broken == x86_pmu.pebs_broken)
 		return;
@@ -3967,7 +3965,9 @@ static bool check_msr(unsigned long msr,
 static __init void intel_sandybridge_quirk(void)
 {
 	x86_pmu.check_microcode = intel_snb_check_microcode;
+	cpus_read_lock();
 	intel_snb_check_microcode();
+	cpus_read_unlock();
 }
 
 static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
@@ -4834,13 +4834,12 @@ static __init int fixup_ht_bug(void)
 
 	lockup_detector_resume();
 
-	get_online_cpus();
+	cpus_read_lock();
 
-	for_each_online_cpu(c) {
+	for_each_online_cpu(c)
 		free_excl_cntrs(&per_cpu(cpu_hw_events, c));
-	}
 
-	put_online_cpus();
+	cpus_read_unlock();
 	pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
 	return 0;
 }
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -807,10 +807,8 @@ void mtrr_save_state(void)
 	if (!mtrr_enabled())
 		return;
 
-	get_online_cpus();
 	first_cpu = cpumask_first(cpu_online_mask);
 	smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
-	put_online_cpus();
 }
 
 void set_mtrr_aps_delayed_init(void)
--- a/arch/xtensa/include/asm/spinlock_types.h
+++ b/arch/xtensa/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef __ASM_SPINLOCK_TYPES_H
 #define __ASM_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 typedef struct {
 	volatile unsigned int slock;
 } arch_spinlock_t;
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -268,9 +268,9 @@ static int acpi_processor_start(struct d
 		return -ENODEV;
 
 	/* Protect against concurrent CPU hotplug operations */
-	get_online_cpus();
+	cpu_hotplug_disable();
 	ret = __acpi_processor_start(device);
-	put_online_cpus();
+	cpu_hotplug_enable();
 	return ret;
 }
 
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -909,6 +909,13 @@ static long __acpi_processor_get_throttl
 	return pr->throttling.acpi_processor_get_throttling(pr);
 }
 
+static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct)
+{
+	if (direct || (is_percpu_thread() && cpu == smp_processor_id()))
+		return fn(arg);
+	return work_on_cpu(cpu, fn, arg);
+}
+
 static int acpi_processor_get_throttling(struct acpi_processor *pr)
 {
 	if (!pr)
@@ -926,7 +933,7 @@ static int acpi_processor_get_throttling
 	if (!cpu_online(pr->id))
 		return -ENODEV;
 
-	return work_on_cpu(pr->id, __acpi_processor_get_throttling, pr);
+	return call_on_cpu(pr->id, __acpi_processor_get_throttling, pr, false);
 }
 
 static int acpi_processor_get_fadt_info(struct acpi_processor *pr)
@@ -1076,13 +1083,6 @@ static long acpi_processor_throttling_fn
 			arg->target_state, arg->force);
 }
 
-static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct)
-{
-	if (direct)
-		return fn(arg);
-	return work_on_cpu(cpu, fn, arg);
-}
-
 static int __acpi_processor_set_throttling(struct acpi_processor *pr,
 					   int state, bool force, bool direct)
 {
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -962,7 +962,7 @@ static ssize_t store(struct kobject *kob
 	struct freq_attr *fattr = to_attr(attr);
 	ssize_t ret = -EINVAL;
 
-	get_online_cpus();
+	cpus_read_lock();
 
 	if (cpu_online(policy->cpu)) {
 		down_write(&policy->rwsem);
@@ -970,7 +970,7 @@ static ssize_t store(struct kobject *kob
 		up_write(&policy->rwsem);
 	}
 
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return ret;
 }
@@ -2531,7 +2531,7 @@ int cpufreq_register_driver(struct cpufr
 	pr_debug("trying to register driver %s\n", driver_data->name);
 
 	/* Protect against concurrent CPU online/offline. */
-	get_online_cpus();
+	cpus_read_lock();
 
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 	if (cpufreq_driver) {
@@ -2564,9 +2564,10 @@ int cpufreq_register_driver(struct cpufr
 		goto err_if_unreg;
 	}
 
-	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
-					cpuhp_cpufreq_online,
-					cpuhp_cpufreq_offline);
+	ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+						   "cpufreq:online",
+						   cpuhp_cpufreq_online,
+						   cpuhp_cpufreq_offline);
 	if (ret < 0)
 		goto err_if_unreg;
 	hp_online = ret;
@@ -2584,7 +2585,7 @@ err_null_driver:
 	cpufreq_driver = NULL;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 out:
-	put_online_cpus();
+	cpus_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
@@ -2607,17 +2608,17 @@ int cpufreq_unregister_driver(struct cpu
 	pr_debug("unregistering driver %s\n", driver->name);
 
 	/* Protect against concurrent cpu hotplug */
-	get_online_cpus();
+	cpus_read_lock();
 	subsys_interface_unregister(&cpufreq_interface);
 	remove_boost_sysfs_file();
-	cpuhp_remove_state_nocalls(hp_online);
+	cpuhp_remove_state_nocalls_cpuslocked(hp_online);
 
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 
 	cpufreq_driver = NULL;
 
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return 0;
 }
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -587,7 +587,7 @@ static void etm_disable_sysfs(struct cor
 	 * after cpu online mask indicates the cpu is offline but before the
 	 * DYING hotplug callback is serviced by the ETM driver.
 	 */
-	get_online_cpus();
+	cpus_read_lock();
 	spin_lock(&drvdata->spinlock);
 
 	/*
@@ -597,7 +597,7 @@ static void etm_disable_sysfs(struct cor
 	smp_call_function_single(drvdata->cpu, etm_disable_hw, drvdata, 1);
 
 	spin_unlock(&drvdata->spinlock);
-	put_online_cpus();
+	cpus_read_unlock();
 
 	dev_info(drvdata->dev, "ETM tracing disabled\n");
 }
@@ -795,7 +795,7 @@ static int etm_probe(struct amba_device
 
 	drvdata->cpu = pdata ? pdata->cpu : 0;
 
-	get_online_cpus();
+	cpus_read_lock();
 	etmdrvdata[drvdata->cpu] = drvdata;
 
 	if (smp_call_function_single(drvdata->cpu,
@@ -803,17 +803,17 @@ static int etm_probe(struct amba_device
 		dev_err(dev, "ETM arch init failed\n");
 
 	if (!etm_count++) {
-		cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
-					  "arm/coresight:starting",
-					  etm_starting_cpu, etm_dying_cpu);
-		ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-						"arm/coresight:online",
-						etm_online_cpu, NULL);
+		cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+						     "arm/coresight:starting",
+						     etm_starting_cpu, etm_dying_cpu);
+		ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+							   "arm/coresight:online",
+							   etm_online_cpu, NULL);
 		if (ret < 0)
 			goto err_arch_supported;
 		hp_online = ret;
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 
 	if (etm_arch_supported(drvdata->arch) == false) {
 		ret = -EINVAL;
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -372,7 +372,7 @@ static void etm4_disable_sysfs(struct co
 	 * after cpu online mask indicates the cpu is offline but before the
 	 * DYING hotplug callback is serviced by the ETM driver.
 	 */
-	get_online_cpus();
+	cpus_read_lock();
 	spin_lock(&drvdata->spinlock);
 
 	/*
@@ -382,7 +382,7 @@ static void etm4_disable_sysfs(struct co
 	smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1);
 
 	spin_unlock(&drvdata->spinlock);
-	put_online_cpus();
+	cpus_read_unlock();
 
 	dev_info(drvdata->dev, "ETM tracing disabled\n");
 }
@@ -983,7 +983,7 @@ static int etm4_probe(struct amba_device
 
 	drvdata->cpu = pdata ? pdata->cpu : 0;
 
-	get_online_cpus();
+	cpus_read_lock();
 	etmdrvdata[drvdata->cpu] = drvdata;
 
 	if (smp_call_function_single(drvdata->cpu,
@@ -991,18 +991,18 @@ static int etm4_probe(struct amba_device
 		dev_err(dev, "ETM arch init failed\n");
 
 	if (!etm4_count++) {
-		cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
-					  "arm/coresight4:starting",
-					  etm4_starting_cpu, etm4_dying_cpu);
-		ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-						"arm/coresight4:online",
-						etm4_online_cpu, NULL);
+		cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+						     "arm/coresight4:starting",
+						     etm4_starting_cpu, etm4_dying_cpu);
+		ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+							   "arm/coresight4:online",
+							   etm4_online_cpu, NULL);
 		if (ret < 0)
 			goto err_arch_supported;
 		hp_online = ret;
 	}
 
-	put_online_cpus();
+	cpus_read_unlock();
 
 	if (etm4_arch_supported(drvdata->arch) == false) {
 		ret = -EINVAL;
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -323,10 +323,19 @@ static long local_pci_probe(void *_ddi)
 	return 0;
 }
 
+static bool pci_physfn_is_probed(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_IOV
+	return dev->is_virtfn && dev->physfn->is_probed;
+#else
+	return false;
+#endif
+}
+
 static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
 			  const struct pci_device_id *id)
 {
-	int error, node;
+	int error, node, cpu;
 	struct drv_dev_and_id ddi = { drv, dev, id };
 
 	/*
@@ -335,33 +344,27 @@ static int pci_call_probe(struct pci_dri
 	 * on the right node.
 	 */
 	node = dev_to_node(&dev->dev);
+	dev->is_probed = 1;
+
+	cpu_hotplug_disable();
 
 	/*
-	 * On NUMA systems, we are likely to call a PF probe function using
-	 * work_on_cpu().  If that probe calls pci_enable_sriov() (which
-	 * adds the VF devices via pci_bus_add_device()), we may re-enter
-	 * this function to call the VF probe function.  Calling
-	 * work_on_cpu() again will cause a lockdep warning.  Since VFs are
-	 * always on the same node as the PF, we can work around this by
-	 * avoiding work_on_cpu() when we're already on the correct node.
-	 *
-	 * Preemption is enabled, so it's theoretically unsafe to use
-	 * numa_node_id(), but even if we run the probe function on the
-	 * wrong node, it should be functionally correct.
+	 * Prevent nesting work_on_cpu() for the case where a Virtual Function
+	 * device is probed from work_on_cpu() of the Physical device.
 	 */
-	if (node >= 0 && node != numa_node_id()) {
-		int cpu;
-
-		get_online_cpus();
+	if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
+	    pci_physfn_is_probed(dev))
+		cpu = nr_cpu_ids;
+	else
 		cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
-		if (cpu < nr_cpu_ids)
-			error = work_on_cpu(cpu, local_pci_probe, &ddi);
-		else
-			error = local_pci_probe(&ddi);
-		put_online_cpus();
-	} else
+
+	if (cpu < nr_cpu_ids)
+		error = work_on_cpu(cpu, local_pci_probe, &ddi);
+	else
 		error = local_pci_probe(&ddi);
 
+	dev->is_probed = 0;
+	cpu_hotplug_enable();
 	return error;
 }
 
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -117,12 +117,11 @@ static inline void cpu_maps_update_done(
 extern struct bus_type cpu_subsys;
 
 #ifdef CONFIG_HOTPLUG_CPU
-/* Stop CPUs going up and down. */
-
-extern void cpu_hotplug_begin(void);
-extern void cpu_hotplug_done(void);
-extern void get_online_cpus(void);
-extern void put_online_cpus(void);
+extern void cpus_write_lock(void);
+extern void cpus_write_unlock(void);
+extern void cpus_read_lock(void);
+extern void cpus_read_unlock(void);
+extern void lockdep_assert_cpus_held(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
 void clear_tasks_mm_cpumask(int cpu);
@@ -130,18 +129,25 @@ int cpu_down(unsigned int cpu);
 extern void pin_current_cpu(void);
 extern void unpin_current_cpu(void);
 
-#else		/* CONFIG_HOTPLUG_CPU */
-
-static inline void cpu_hotplug_begin(void) {}
-static inline void cpu_hotplug_done(void) {}
-#define get_online_cpus()	do { } while (0)
-#define put_online_cpus()	do { } while (0)
-#define cpu_hotplug_disable()	do { } while (0)
-#define cpu_hotplug_enable()	do { } while (0)
-static inline void pin_current_cpu(void)	{ }
-static inline void unpin_current_cpu(void)	{ }
+#else /* CONFIG_HOTPLUG_CPU */
 
-#endif		/* CONFIG_HOTPLUG_CPU */
+static inline void cpus_write_lock(void) { }
+static inline void cpus_write_unlock(void) { }
+static inline void cpus_read_lock(void) { }
+static inline void cpus_read_unlock(void) { }
+static inline void lockdep_assert_cpus_held(void) { }
+static inline void cpu_hotplug_disable(void) { }
+static inline void cpu_hotplug_enable(void) { }
+static inline void pin_current_cpu(void) { }
+static inline void unpin_current_cpu(void) { }
+
+#endif	/* !CONFIG_HOTPLUG_CPU */
+
+/* Wrappers which go away once all code is converted */
+static inline void cpu_hotplug_begin(void) { cpus_write_lock(); }
+static inline void cpu_hotplug_done(void) { cpus_write_unlock(); }
+#define get_online_cpus cpus_read_lock
+#define put_online_cpus cpus_read_unlock
 
 #ifdef CONFIG_PM_SLEEP_SMP
 extern int freeze_secondary_cpus(int primary);
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -164,6 +164,11 @@ int __cpuhp_setup_state(enum cpuhp_state
 			int (*startup)(unsigned int cpu),
 			int (*teardown)(unsigned int cpu), bool multi_instance);
 
+int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name,
+				   bool invoke,
+				   int (*startup)(unsigned int cpu),
+				   int (*teardown)(unsigned int cpu),
+				   bool multi_instance);
 /**
  * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
  * @state:	The state for which the calls are installed
@@ -182,6 +187,15 @@ static inline int cpuhp_setup_state(enum
 	return __cpuhp_setup_state(state, name, true, startup, teardown, false);
 }
 
+static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+					       const char *name,
+					       int (*startup)(unsigned int cpu),
+					       int (*teardown)(unsigned int cpu))
+{
+	return __cpuhp_setup_state_cpuslocked(state, name, true, startup,
+					      teardown, false);
+}
+
 /**
  * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
  *			       callbacks
@@ -202,6 +216,15 @@ static inline int cpuhp_setup_state_noca
 				   false);
 }
 
+static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
+						     const char *name,
+						     int (*startup)(unsigned int cpu),
+						     int (*teardown)(unsigned int cpu))
+{
+	return __cpuhp_setup_state_cpuslocked(state, name, false, startup,
+					    teardown, false);
+}
+
 /**
  * cpuhp_setup_state_multi - Add callbacks for multi state
  * @state:	The state for which the calls are installed
@@ -228,6 +251,8 @@ static inline int cpuhp_setup_state_mult
 
 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
 			       bool invoke);
+int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+					  struct hlist_node *node, bool invoke);
 
 /**
  * cpuhp_state_add_instance - Add an instance for a state and invoke startup
@@ -260,7 +285,15 @@ static inline int cpuhp_state_add_instan
 	return __cpuhp_state_add_instance(state, node, false);
 }
 
+static inline int
+cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state,
+					    struct hlist_node *node)
+{
+	return __cpuhp_state_add_instance_cpuslocked(state, node, false);
+}
+
 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
+void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
 
 /**
  * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
@@ -284,6 +317,11 @@ static inline void cpuhp_remove_state_no
 	__cpuhp_remove_state(state, false);
 }
 
+static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state)
+{
+	__cpuhp_remove_state_cpuslocked(state, false);
+}
+
 /**
  * cpuhp_remove_multi_state - Remove hotplug multi state callback
  * @state:	The state for which the calls are removed
--- a/include/linux/locallock.h
+++ b/include/linux/locallock.h
@@ -36,26 +36,10 @@ struct local_irq_lock {
 			spin_lock_init(&per_cpu(lvar, __cpu).lock);	\
 	} while (0)
 
-/*
- * spin_lock|trylock|unlock_local flavour that does not migrate disable
- * used for __local_lock|trylock|unlock where get_local_var/put_local_var
- * already takes care of the migrate_disable/enable
- * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
- */
-#ifdef CONFIG_PREEMPT_RT_FULL
-# define spin_lock_local(lock)			rt_spin_lock__no_mg(lock)
-# define spin_trylock_local(lock)		rt_spin_trylock__no_mg(lock)
-# define spin_unlock_local(lock)		rt_spin_unlock__no_mg(lock)
-#else
-# define spin_lock_local(lock)			spin_lock(lock)
-# define spin_trylock_local(lock)		spin_trylock(lock)
-# define spin_unlock_local(lock)		spin_unlock(lock)
-#endif
-
 static inline void __local_lock(struct local_irq_lock *lv)
 {
 	if (lv->owner != current) {
-		spin_lock_local(&lv->lock);
+		spin_lock(&lv->lock);
 		LL_WARN(lv->owner);
 		LL_WARN(lv->nestcnt);
 		lv->owner = current;
@@ -71,7 +55,7 @@ static inline void __local_lock(struct l
 
 static inline int __local_trylock(struct local_irq_lock *lv)
 {
-	if (lv->owner != current && spin_trylock_local(&lv->lock)) {
+	if (lv->owner != current && spin_trylock(&lv->lock)) {
 		LL_WARN(lv->owner);
 		LL_WARN(lv->nestcnt);
 		lv->owner = current;
@@ -98,7 +82,7 @@ static inline void __local_unlock(struct
 		return;
 
 	lv->owner = NULL;
-	spin_unlock_local(&lv->lock);
+	spin_unlock(&lv->lock);
 }
 
 #define local_unlock(lvar)					\
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -170,9 +170,6 @@ struct padata_instance {
 
 extern struct padata_instance *padata_alloc_possible(
 					struct workqueue_struct *wq);
-extern struct padata_instance *padata_alloc(struct workqueue_struct *wq,
-					    const struct cpumask *pcpumask,
-					    const struct cpumask *cbcpumask);
 extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
 			      struct padata_priv *padata, int cb_cpu);
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -415,6 +415,7 @@ struct pci_dev {
 	unsigned int	irq_managed:1;
 	unsigned int	has_secondary_link:1;
 	unsigned int	non_compliant_bars:1;	/* broken BARs; ignore them */
+	unsigned int	is_probed:1;		/* device probing in progress */
 	unsigned int	no_vf_scan:1;		/* Don't scan for VFs after IOV enablement */
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -800,6 +800,8 @@ struct perf_cpu_context {
 
 	struct list_head		sched_cb_entry;
 	int				sched_cb_usage;
+
+	int				online;
 };
 
 struct perf_output_handle {
--- a/include/linux/rwlock_rt.h
+++ b/include/linux/rwlock_rt.h
@@ -5,41 +5,39 @@
 #error Do not include directly. Use spinlock.h
 #endif
 
-#define rwlock_init(rwl)				\
-do {							\
-	static struct lock_class_key __key;		\
-							\
-	rt_mutex_init(&(rwl)->lock);			\
-	__rt_rwlock_init(rwl, #rwl, &__key);		\
-} while (0)
-
 extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
 extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
 extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
-extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
 extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
 extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
 extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
-extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
-extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
 extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
 
 #define read_trylock(lock)	__cond_lock(lock, rt_read_trylock(lock))
 #define write_trylock(lock)	__cond_lock(lock, rt_write_trylock(lock))
 
-#define write_trylock_irqsave(lock, flags)	\
-	__cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
+static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags)
+{
+	/* XXX ARCH_IRQ_ENABLED */
+	*flags = 0;
+	return rt_write_trylock(lock);
+}
+
+#define write_trylock_irqsave(lock, flags)		\
+	__cond_lock(lock, __write_trylock_rt_irqsave(lock, &(flags)))
 
 #define read_lock_irqsave(lock, flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		flags = rt_read_lock_irqsave(lock);	\
+		rt_read_lock(lock);			\
+		flags = 0;				\
 	} while (0)
 
 #define write_lock_irqsave(lock, flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		flags = rt_write_lock_irqsave(lock);	\
+		rt_write_lock(lock);			\
+		flags = 0;				\
 	} while (0)
 
 #define read_lock(lock)		rt_read_lock(lock)
@@ -96,4 +94,21 @@ extern void __rt_rwlock_init(rwlock_t *r
 		rt_write_unlock(lock);			\
 	} while (0)
 
+#define rwlock_init(rwl)				\
+do {							\
+	static struct lock_class_key __key;		\
+							\
+	__rt_rwlock_init(rwl, #rwl, &__key);		\
+} while (0)
+
+/*
+ * Internal functions made global for CPU pinning
+ */
+void __read_rt_lock(struct rt_rw_lock *lock);
+int __read_rt_trylock(struct rt_rw_lock *lock);
+void __write_rt_lock(struct rt_rw_lock *lock);
+int __write_rt_trylock(struct rt_rw_lock *lock);
+void __read_rt_unlock(struct rt_rw_lock *lock);
+void __write_rt_unlock(struct rt_rw_lock *lock);
+
 #endif
--- a/include/linux/rwlock_types_rt.h
+++ b/include/linux/rwlock_types_rt.h
@@ -5,24 +5,24 @@
 #error "Do not include directly. Include spinlock_types.h instead"
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define RW_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname }
+#else
+# define RW_DEP_MAP_INIT(lockname)
+#endif
+
+#ifndef CONFIG_RWLOCK_RT_READER_BIASED
 /*
  * rwlocks - rtmutex which allows single reader recursion
  */
 typedef struct {
 	struct rt_mutex		lock;
-	int			read_depth;
 	unsigned int		break_lock;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
 #endif
 } rwlock_t;
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define RW_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname }
-#else
-# define RW_DEP_MAP_INIT(lockname)
-#endif
-
 #define __RW_LOCK_UNLOCKED(name) \
 	{ .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock),	\
 	  RW_DEP_MAP_INIT(name) }
@@ -30,4 +30,49 @@ typedef struct {
 #define DEFINE_RWLOCK(name) \
 	rwlock_t name = __RW_LOCK_UNLOCKED(name)
 
+#else /* CONFIG_RWLOCK_RT_READER_BIASED */
+
+typedef struct rt_rw_lock rwlock_t;
+
+#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name)
+
+#define DEFINE_RWLOCK(name) \
+	rwlock_t name = __RW_LOCK_UNLOCKED(name)
+
+#endif /* !CONFIG_RWLOCK_RT_READER_BIASED */
+
+/*
+ * A reader biased implementation primarily for CPU pinning.
+ *
+ * Can be selected as general replacement for the single reader RT rwlock
+ * variant
+ */
+struct rt_rw_lock {
+	struct rt_mutex		rtmutex;
+	atomic_t		readers;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
+};
+
+#define READER_BIAS	(1U << 31)
+#define WRITER_BIAS	(1U << 30)
+
+#define __RWLOCK_RT_INITIALIZER(name)					\
+{									\
+	.readers = ATOMIC_INIT(READER_BIAS),				\
+	.rtmutex = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.rtmutex),	\
+	RW_DEP_MAP_INIT(name)						\
+}
+
+void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name,
+			     struct lock_class_key *key);
+
+#define rwlock_biased_rt_init(rwlock)					\
+	do {								\
+		static struct lock_class_key __key;			\
+									\
+		__rwlock_biased_rt_init((rwlock), #rwlock, &__key);	\
+	} while (0)
+
 #endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -576,6 +576,7 @@ struct task_struct {
 #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
 	int				migrate_disable;
 	int				migrate_disable_update;
+	int				pinned_on_cpu;
 # ifdef CONFIG_SCHED_DEBUG
 	int				migrate_disable_atomic;
 # endif
@@ -1336,6 +1337,16 @@ extern struct pid *cad_pid;
 #define tsk_used_math(p)			((p)->flags & PF_USED_MATH)
 #define used_math()				tsk_used_math(current)
 
+static inline bool is_percpu_thread(void)
+{
+#ifdef CONFIG_SMP
+	return (current->flags & PF_NO_SETAFFINITY) &&
+		(current->nr_cpus_allowed  == 1);
+#else
+	return true;
+#endif
+}
+
 /* Per-process atomic flags. */
 #define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
 #define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
@@ -1394,10 +1405,6 @@ extern int task_can_attach(struct task_s
 #ifdef CONFIG_SMP
 extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
 extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
-int migrate_me(void);
-void tell_sched_cpu_down_begin(int cpu);
-void tell_sched_cpu_down_done(int cpu);
-
 #else
 static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
@@ -1408,9 +1415,6 @@ static inline int set_cpus_allowed_ptr(s
 		return -EINVAL;
 	return 0;
 }
-static inline int migrate_me(void) { return 0; }
-static inline void tell_sched_cpu_down_begin(int cpu) { }
-static inline void tell_sched_cpu_down_done(int cpu) { }
 #endif
 
 #ifndef cpu_relax_yield
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -18,10 +18,6 @@ do {							\
 	__rt_spin_lock_init(slock, #slock, &__key);	\
 } while (0)
 
-void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock);
-void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock);
-int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock);
-
 extern void __lockfunc rt_spin_lock(spinlock_t *lock);
 extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
 extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
@@ -35,9 +31,10 @@ extern int atomic_dec_and_spin_lock(atom
 /*
  * lockdep-less calls, for derived types like rwlock:
  * (for trylock they can use rt_mutex_trylock() directly.
+ * Migrate disable handling must be done at the call site.
  */
-extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock);
 extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
+extern void __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
 extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
 
 #define spin_lock(lock)			rt_spin_lock(lock)
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -1,10 +1,6 @@
 #ifndef __LINUX_SPINLOCK_TYPES_UP_H
 #define __LINUX_SPINLOCK_TYPES_UP_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 /*
  * include/linux/spinlock_types_up.h - spinlock type definitions for UP
  *
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -116,15 +116,29 @@ static inline int try_stop_cpus(const st
  * @fn() runs.
  *
  * This can be thought of as a very heavy write lock, equivalent to
- * grabbing every spinlock in the kernel. */
+ * grabbing every spinlock in the kernel.
+ *
+ * Protects against CPU hotplug.
+ */
 int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 
+/**
+ * stop_machine_cpuslocked: freeze the machine on all CPUs and run this function
+ * @fn: the function to run
+ * @data: the data ptr for the @fn()
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+ *
+ * Same as above. Must be called from with in a cpus_read_lock() protected
+ * region. Avoids nested calls to cpus_read_lock().
+ */
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
+
 int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 				   const struct cpumask *cpus);
 #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 
-static __always_inline int stop_machine(cpu_stop_fn_t fn, void *data,
-					const struct cpumask *cpus)
+static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+						   const struct cpumask *cpus)
 {
 	unsigned long flags;
 	int ret;
@@ -134,6 +148,12 @@ static __always_inline int stop_machine(
 	return ret;
 }
 
+static inline int stop_machine(cpu_stop_fn_t fn, void *data,
+			       const struct cpumask *cpus)
+{
+	return stop_machine_cpuslocked(fn, data, cpus);
+}
+
 static __always_inline int
 stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 			       const struct cpumask *cpus)
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -248,3 +248,20 @@ config ARCH_USE_QUEUED_RWLOCKS
 config QUEUED_RWLOCKS
 	def_bool y if ARCH_USE_QUEUED_RWLOCKS
 	depends on SMP
+
+if PREEMPT_RT_FULL
+
+menu "RT Locking"
+
+config RWLOCK_RT_READER_BIASED
+	bool "Reader biased RWLOCK implementation for Preempt-RT"
+	def_bool n
+	help
+	  This is option provides an alternative RWLOCK implementation for
+	  PREEMPT-RT.  This new implementation is not writer friendly as
+	  the regular RT implementation or mainline. However nothing RT
+	  related should be affected. Nevertheless here is a switch in case
+	  something stalls to double check.
+endmenu
+
+endif
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -28,6 +28,7 @@
 #include <linux/smpboot.h>
 #include <linux/relay.h>
 #include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -67,6 +68,17 @@ struct cpuhp_cpu_state {
 
 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
 
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PREEMPT_RT_FULL)
+static DEFINE_PER_CPU(struct rt_rw_lock, cpuhp_pin_lock) = \
+	__RWLOCK_RT_INITIALIZER(cpuhp_pin_lock);
+#endif
+
+#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
+static struct lock_class_key cpuhp_state_key;
+static struct lockdep_map cpuhp_state_lock_map =
+	STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+#endif
+
 /**
  * cpuhp_step - Hotplug state machine step
  * @name:	Name of the step
@@ -198,404 +210,90 @@ void cpu_maps_update_done(void)
 	mutex_unlock(&cpu_add_remove_lock);
 }
 
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
  */
 static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-	struct task_struct *active_writer;
-	/* wait queue to wake up the active_writer */
-	wait_queue_head_t wq;
-	/* verifies that no writer will get active while readers are active */
-	struct mutex lock;
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
-	.active_writer = NULL,
-	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	.dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
-#endif
-};
-
-/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
-#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire_tryread() \
-				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
-#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
-
-/**
- * hotplug_pcp	- per cpu hotplug descriptor
- * @unplug:	set when pin_current_cpu() needs to sync tasks
- * @sync_tsk:	the task that waits for tasks to finish pinned sections
- * @refcount:	counter of tasks in pinned sections
- * @grab_lock:	set when the tasks entering pinned sections should wait
- * @synced:	notifier for @sync_tsk to tell cpu_down it's finished
- * @mutex:	the mutex to make tasks wait (used when @grab_lock is true)
- * @mutex_init:	zero if the mutex hasn't been initialized yet.
- *
- * Although @unplug and @sync_tsk may point to the same task, the @unplug
- * is used as a flag and still exists after @sync_tsk has exited and
- * @sync_tsk set to NULL.
- */
-struct hotplug_pcp {
-	struct task_struct *unplug;
-	struct task_struct *sync_tsk;
-	int refcount;
-	int grab_lock;
-	struct completion synced;
-	struct completion unplug_wait;
-#ifdef CONFIG_PREEMPT_RT_FULL
-	/*
-	 * Note, on PREEMPT_RT, the hotplug lock must save the state of
-	 * the task, otherwise the mutex will cause the task to fail
-	 * to sleep when required. (Because it's called from migrate_disable())
-	 *
-	 * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
-	 * state.
-	 */
-	spinlock_t lock;
-#else
-	struct mutex mutex;
-#endif
-	int mutex_init;
-};
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
-# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
-#else
-# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
-# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
-#endif
-
-static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
-
 /**
  * pin_current_cpu - Prevent the current cpu from being unplugged
- *
- * Lightweight version of get_online_cpus() to prevent cpu from being
- * unplugged when code runs in a migration disabled region.
- *
- * Must be called with preemption disabled (preempt_count = 1)!
  */
 void pin_current_cpu(void)
 {
-	struct hotplug_pcp *hp;
-	int force = 0;
-
-retry:
-	hp = this_cpu_ptr(&hotplug_pcp);
+#ifdef CONFIG_PREEMPT_RT_FULL
+	struct rt_rw_lock *cpuhp_pin;
+	unsigned int cpu;
+	int ret;
 
-	if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
-	    hp->unplug == current) {
-		hp->refcount++;
+again:
+	cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock);
+	ret = __read_rt_trylock(cpuhp_pin);
+	if (ret) {
+		current->pinned_on_cpu = smp_processor_id();
 		return;
 	}
-	if (hp->grab_lock) {
-		preempt_enable();
-		hotplug_lock(hp);
-		hotplug_unlock(hp);
-	} else {
-		preempt_enable();
-		/*
-		 * Try to push this task off of this CPU.
-		 */
-		if (!migrate_me()) {
-			preempt_disable();
-			hp = this_cpu_ptr(&hotplug_pcp);
-			if (!hp->grab_lock) {
-				/*
-				 * Just let it continue it's already pinned
-				 * or about to sleep.
-				 */
-				force = 1;
-				goto retry;
-			}
-			preempt_enable();
-		}
-	}
-	preempt_disable();
-	goto retry;
-}
-
-/**
- * unpin_current_cpu - Allow unplug of current cpu
- *
- * Must be called with preemption or interrupts disabled!
- */
-void unpin_current_cpu(void)
-{
-	struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
-
-	WARN_ON(hp->refcount <= 0);
-
-	/* This is safe. sync_unplug_thread is pinned to this cpu */
-	if (!--hp->refcount && hp->unplug && hp->unplug != current)
-		wake_up_process(hp->unplug);
-}
-
-static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
-{
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	while (hp->refcount) {
-		schedule_preempt_disabled();
-		set_current_state(TASK_UNINTERRUPTIBLE);
-	}
-}
-
-static int sync_unplug_thread(void *data)
-{
-	struct hotplug_pcp *hp = data;
-
-	wait_for_completion(&hp->unplug_wait);
-	preempt_disable();
-	hp->unplug = current;
-	wait_for_pinned_cpus(hp);
-
-	/*
-	 * This thread will synchronize the cpu_down() with threads
-	 * that have pinned the CPU. When the pinned CPU count reaches
-	 * zero, we inform the cpu_down code to continue to the next step.
-	 */
-	set_current_state(TASK_UNINTERRUPTIBLE);
+	cpu = smp_processor_id();
+	preempt_lazy_enable();
 	preempt_enable();
-	complete(&hp->synced);
 
-	/*
-	 * If all succeeds, the next step will need tasks to wait till
-	 * the CPU is offline before continuing. To do this, the grab_lock
-	 * is set and tasks going into pin_current_cpu() will block on the
-	 * mutex. But we still need to wait for those that are already in
-	 * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
-	 * will kick this thread out.
-	 */
-	while (!hp->grab_lock && !kthread_should_stop()) {
-		schedule();
-		set_current_state(TASK_UNINTERRUPTIBLE);
-	}
-
-	/* Make sure grab_lock is seen before we see a stale completion */
-	smp_mb();
+	__read_rt_lock(cpuhp_pin);
 
-	/*
-	 * Now just before cpu_down() enters stop machine, we need to make
-	 * sure all tasks that are in pinned CPU sections are out, and new
-	 * tasks will now grab the lock, keeping them from entering pinned
-	 * CPU sections.
-	 */
-	if (!kthread_should_stop()) {
-		preempt_disable();
-		wait_for_pinned_cpus(hp);
-		preempt_enable();
-		complete(&hp->synced);
-	}
-
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	while (!kthread_should_stop()) {
-		schedule();
-		set_current_state(TASK_UNINTERRUPTIBLE);
+	preempt_disable();
+	preempt_lazy_disable();
+	if (cpu != smp_processor_id()) {
+		__read_rt_unlock(cpuhp_pin);
+		goto again;
 	}
-	set_current_state(TASK_RUNNING);
-
-	/*
-	 * Force this thread off this CPU as it's going down and
-	 * we don't want any more work on this CPU.
-	 */
-	current->flags &= ~PF_NO_SETAFFINITY;
-	set_cpus_allowed_ptr(current, cpu_present_mask);
-	migrate_me();
-	return 0;
-}
-
-static void __cpu_unplug_sync(struct hotplug_pcp *hp)
-{
-	wake_up_process(hp->sync_tsk);
-	wait_for_completion(&hp->synced);
-}
-
-static void __cpu_unplug_wait(unsigned int cpu)
-{
-	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
-	complete(&hp->unplug_wait);
-	wait_for_completion(&hp->synced);
+	current->pinned_on_cpu = cpu;
+#endif
 }
 
-/*
- * Start the sync_unplug_thread on the target cpu and wait for it to
- * complete.
+/**
+ * unpin_current_cpu - Allow unplug of current cpu
  */
-static int cpu_unplug_begin(unsigned int cpu)
+void unpin_current_cpu(void)
 {
-	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-	int err;
-
-	/* Protected by cpu_hotplug.lock */
-	if (!hp->mutex_init) {
 #ifdef CONFIG_PREEMPT_RT_FULL
-		spin_lock_init(&hp->lock);
-#else
-		mutex_init(&hp->mutex);
-#endif
-		hp->mutex_init = 1;
-	}
-
-	/* Inform the scheduler to migrate tasks off this CPU */
-	tell_sched_cpu_down_begin(cpu);
+	struct rt_rw_lock *cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock);
 
-	init_completion(&hp->synced);
-	init_completion(&hp->unplug_wait);
-
-	hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
-	if (IS_ERR(hp->sync_tsk)) {
-		err = PTR_ERR(hp->sync_tsk);
-		hp->sync_tsk = NULL;
-		return err;
-	}
-	kthread_bind(hp->sync_tsk, cpu);
+	if (WARN_ON(current->pinned_on_cpu != smp_processor_id()))
+		cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, current->pinned_on_cpu);
 
-	/*
-	 * Wait for tasks to get out of the pinned sections,
-	 * it's still OK if new tasks enter. Some CPU notifiers will
-	 * wait for tasks that are going to enter these sections and
-	 * we must not have them block.
-	 */
-	wake_up_process(hp->sync_tsk);
-	return 0;
+	current->pinned_on_cpu = -1;
+	__read_rt_unlock(cpuhp_pin);
+#endif
 }
 
-static void cpu_unplug_sync(unsigned int cpu)
-{
-	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
-	init_completion(&hp->synced);
-	/* The completion needs to be initialzied before setting grab_lock */
-	smp_wmb();
+DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
 
-	/* Grab the mutex before setting grab_lock */
-	hotplug_lock(hp);
-	hp->grab_lock = 1;
-
-	/*
-	 * The CPU notifiers have been completed.
-	 * Wait for tasks to get out of pinned CPU sections and have new
-	 * tasks block until the CPU is completely down.
-	 */
-	__cpu_unplug_sync(hp);
-
-	/* All done with the sync thread */
-	kthread_stop(hp->sync_tsk);
-	hp->sync_tsk = NULL;
-}
-
-static void cpu_unplug_done(unsigned int cpu)
+void cpus_read_lock(void)
 {
-	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
-	hp->unplug = NULL;
-	/* Let all tasks know cpu unplug is finished before cleaning up */
-	smp_wmb();
-
-	if (hp->sync_tsk)
-		kthread_stop(hp->sync_tsk);
-
-	if (hp->grab_lock) {
-		hotplug_unlock(hp);
-		/* protected by cpu_hotplug.lock */
-		hp->grab_lock = 0;
-	}
-	tell_sched_cpu_down_done(cpu);
+	percpu_down_read(&cpu_hotplug_lock);
 }
+EXPORT_SYMBOL_GPL(cpus_read_lock);
 
-void get_online_cpus(void)
+void cpus_read_unlock(void)
 {
-	might_sleep();
-	if (cpu_hotplug.active_writer == current)
-		return;
-	cpuhp_lock_acquire_read();
-	mutex_lock(&cpu_hotplug.lock);
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
+	percpu_up_read(&cpu_hotplug_lock);
 }
-EXPORT_SYMBOL_GPL(get_online_cpus);
+EXPORT_SYMBOL_GPL(cpus_read_unlock);
 
-void put_online_cpus(void)
+void cpus_write_lock(void)
 {
-	int refcount;
-
-	if (cpu_hotplug.active_writer == current)
-		return;
-
-	refcount = atomic_dec_return(&cpu_hotplug.refcount);
-	if (WARN_ON(refcount < 0)) /* try to fix things up */
-		atomic_inc(&cpu_hotplug.refcount);
-
-	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-		wake_up(&cpu_hotplug.wq);
-
-	cpuhp_lock_release();
-
+	percpu_down_write(&cpu_hotplug_lock);
 }
-EXPORT_SYMBOL_GPL(put_online_cpus);
 
-/*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
-void cpu_hotplug_begin(void)
+void cpus_write_unlock(void)
 {
-	DEFINE_WAIT(wait);
-
-	cpu_hotplug.active_writer = current;
-	cpuhp_lock_acquire();
-
-	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (likely(!atomic_read(&cpu_hotplug.refcount)))
-				break;
-		mutex_unlock(&cpu_hotplug.lock);
-		schedule();
-	}
-	finish_wait(&cpu_hotplug.wq, &wait);
+	percpu_up_write(&cpu_hotplug_lock);
 }
 
-void cpu_hotplug_done(void)
+void lockdep_assert_cpus_held(void)
 {
-	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
-	cpuhp_lock_release();
+	percpu_rwsem_assert_held(&cpu_hotplug_lock);
 }
 
 /*
@@ -877,6 +575,7 @@ static void cpuhp_thread_fun(unsigned in
 
 	st->should_run = false;
 
+	lock_map_acquire(&cpuhp_state_lock_map);
 	/* Single callback invocation for [un]install ? */
 	if (st->single) {
 		if (st->cb_state < CPUHP_AP_ONLINE) {
@@ -903,6 +602,7 @@ static void cpuhp_thread_fun(unsigned in
 		else if (st->state > st->target)
 			ret = cpuhp_ap_offline(cpu, st);
 	}
+	lock_map_release(&cpuhp_state_lock_map);
 	st->result = ret;
 	complete(&st->done);
 }
@@ -917,6 +617,9 @@ cpuhp_invoke_ap_callback(int cpu, enum c
 	if (!cpu_online(cpu))
 		return 0;
 
+	lock_map_acquire(&cpuhp_state_lock_map);
+	lock_map_release(&cpuhp_state_lock_map);
+
 	/*
 	 * If we are up and running, use the hotplug thread. For early calls
 	 * we invoke the thread function directly.
@@ -960,6 +663,8 @@ static int cpuhp_kick_ap_work(unsigned i
 	enum cpuhp_state state = st->state;
 
 	trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
+	lock_map_acquire(&cpuhp_state_lock_map);
+	lock_map_release(&cpuhp_state_lock_map);
 	__cpuhp_kick_ap_work(st);
 	wait_for_completion(&st->done);
 	trace_cpuhp_exit(cpu, st->state, state, st->result);
@@ -1058,27 +763,33 @@ static int take_cpu_down(void *_param)
 
 static int takedown_cpu(unsigned int cpu)
 {
+#ifdef CONFIG_PREEMPT_RT_FULL
+	struct rt_rw_lock *cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, cpu);
+#endif
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 	int err;
 
-	__cpu_unplug_wait(cpu);
 	/* Park the smpboot threads */
 	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
 
-	/* Notifiers are done. Don't let any more tasks pin this CPU. */
-	cpu_unplug_sync(cpu);
-
 	/*
 	 * Prevent irq alloc/free while the dying cpu reorganizes the
 	 * interrupt affinities.
 	 */
 	irq_lock_sparse();
 
+#ifdef CONFIG_PREEMPT_RT_FULL
+	__write_rt_lock(cpuhp_pin);
+#endif
+
 	/*
 	 * So now all preempt/rcu users must observe !cpu_active().
 	 */
-	err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
+	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
 	if (err) {
+#ifdef CONFIG_PREEMPT_RT_FULL
+		__write_rt_unlock(cpuhp_pin);
+#endif
 		/* CPU refused to die */
 		irq_unlock_sparse();
 		/* Unpark the hotplug thread so we can rollback there */
@@ -1097,6 +808,9 @@ static int takedown_cpu(unsigned int cpu
 	wait_for_completion(&st->done);
 	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
 
+#ifdef CONFIG_PREEMPT_RT_FULL
+	__write_rt_unlock(cpuhp_pin);
+#endif
 	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
 	irq_unlock_sparse();
 
@@ -1142,9 +856,6 @@ static int __ref _cpu_down(unsigned int
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 	int prev_state, ret = 0;
-	int mycpu;
-	cpumask_var_t cpumask;
-	cpumask_var_t cpumask_org;
 
 	if (num_online_cpus() == 1)
 		return -EBUSY;
@@ -1152,34 +863,7 @@ static int __ref _cpu_down(unsigned int
 	if (!cpu_present(cpu))
 		return -EINVAL;
 
-	/* Move the downtaker off the unplug cpu */
-	if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
-		return -ENOMEM;
-	if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL))  {
-		free_cpumask_var(cpumask);
-		return -ENOMEM;
-	}
-
-	cpumask_copy(cpumask_org, &current->cpus_mask);
-	cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
-	set_cpus_allowed_ptr(current, cpumask);
-	free_cpumask_var(cpumask);
-	migrate_disable();
-	mycpu = smp_processor_id();
-	if (mycpu == cpu) {
-		printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
-		migrate_enable();
-		ret = -EBUSY;
-		goto restore_cpus;
-	}
-
-	migrate_enable();
-	cpu_hotplug_begin();
-	ret = cpu_unplug_begin(cpu);
-	if (ret) {
-		printk("cpu_unplug_begin(%d) failed\n", cpu);
-		goto out_cancel;
-	}
+	cpus_write_lock();
 
 	cpuhp_tasks_frozen = tasks_frozen;
 
@@ -1217,12 +901,7 @@ static int __ref _cpu_down(unsigned int
 	}
 
 out:
-	cpu_unplug_done(cpu);
-out_cancel:
-	cpu_hotplug_done();
-restore_cpus:
-	set_cpus_allowed_ptr(current, cpumask_org);
-	free_cpumask_var(cpumask_org);
+	cpus_write_unlock();
 	return ret;
 }
 
@@ -1293,7 +972,7 @@ static int _cpu_up(unsigned int cpu, int
 	struct task_struct *idle;
 	int ret = 0;
 
-	cpu_hotplug_begin();
+	cpus_write_lock();
 
 	if (!cpu_present(cpu)) {
 		ret = -EINVAL;
@@ -1828,18 +1507,20 @@ static void cpuhp_rollback_install(int f
 	}
 }
 
-int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
-			       bool invoke)
+int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+					  struct hlist_node *node,
+					  bool invoke)
 {
 	struct cpuhp_step *sp;
 	int cpu;
 	int ret;
 
+	lockdep_assert_cpus_held();
+
 	sp = cpuhp_get_step(state);
 	if (sp->multi_instance == false)
 		return -EINVAL;
 
-	get_online_cpus();
 	mutex_lock(&cpuhp_state_mutex);
 
 	if (!invoke || !sp->startup.multi)
@@ -1868,13 +1549,23 @@ add_node:
 	hlist_add_head(node, &sp->list);
 unlock:
 	mutex_unlock(&cpuhp_state_mutex);
-	put_online_cpus();
+	return ret;
+}
+
+int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+			       bool invoke)
+{
+	int ret;
+
+	cpus_read_lock();
+	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
+	cpus_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
 
 /**
- * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
  * @state:		The state to setup
  * @invoke:		If true, the startup function is invoked for cpus where
  *			cpu state >= @state
@@ -1883,25 +1574,27 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_inst
  * @multi_instance:	State is set up for multiple instances which get
  *			added afterwards.
  *
+ * The caller needs to hold cpus read locked while calling this function.
  * Returns:
  *   On success:
  *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
  *      0 for all other states
  *   On failure: proper (negative) error code
  */
-int __cpuhp_setup_state(enum cpuhp_state state,
-			const char *name, bool invoke,
-			int (*startup)(unsigned int cpu),
-			int (*teardown)(unsigned int cpu),
-			bool multi_instance)
+int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+				   const char *name, bool invoke,
+				   int (*startup)(unsigned int cpu),
+				   int (*teardown)(unsigned int cpu),
+				   bool multi_instance)
 {
 	int cpu, ret = 0;
 	bool dynstate;
 
+	lockdep_assert_cpus_held();
+
 	if (cpuhp_cb_check(state) || !name)
 		return -EINVAL;
 
-	get_online_cpus();
 	mutex_lock(&cpuhp_state_mutex);
 
 	ret = cpuhp_store_callbacks(state, name, startup, teardown,
@@ -1937,7 +1630,6 @@ int __cpuhp_setup_state(enum cpuhp_state
 	}
 out:
 	mutex_unlock(&cpuhp_state_mutex);
-	put_online_cpus();
 	/*
 	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
 	 * dynamically allocated state in case of success.
@@ -1946,6 +1638,22 @@ out:
 		return state;
 	return ret;
 }
+EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
+
+int __cpuhp_setup_state(enum cpuhp_state state,
+			const char *name, bool invoke,
+			int (*startup)(unsigned int cpu),
+			int (*teardown)(unsigned int cpu),
+			bool multi_instance)
+{
+	int ret;
+
+	cpus_read_lock();
+	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
+					     teardown, multi_instance);
+	cpus_read_unlock();
+	return ret;
+}
 EXPORT_SYMBOL(__cpuhp_setup_state);
 
 int __cpuhp_state_remove_instance(enum cpuhp_state state,
@@ -1959,7 +1667,7 @@ int __cpuhp_state_remove_instance(enum c
 	if (!sp->multi_instance)
 		return -EINVAL;
 
-	get_online_cpus();
+	cpus_read_lock();
 	mutex_lock(&cpuhp_state_mutex);
 
 	if (!invoke || !cpuhp_get_teardown_cb(state))
@@ -1980,29 +1688,30 @@ int __cpuhp_state_remove_instance(enum c
 remove:
 	hlist_del(node);
 	mutex_unlock(&cpuhp_state_mutex);
-	put_online_cpus();
+	cpus_read_unlock();
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
 
 /**
- * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
  * @state:	The state to remove
  * @invoke:	If true, the teardown function is invoked for cpus where
  *		cpu state >= @state
  *
+ * The caller needs to hold cpus read locked while calling this function.
  * The teardown callback is currently not allowed to fail. Think
  * about module removal!
  */
-void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
 {
 	struct cpuhp_step *sp = cpuhp_get_step(state);
 	int cpu;
 
 	BUG_ON(cpuhp_cb_check(state));
 
-	get_online_cpus();
+	lockdep_assert_cpus_held();
 
 	mutex_lock(&cpuhp_state_mutex);
 	if (sp->multi_instance) {
@@ -2030,7 +1739,14 @@ void __cpuhp_remove_state(enum cpuhp_sta
 remove:
 	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
 	mutex_unlock(&cpuhp_state_mutex);
-	put_online_cpus();
+}
+EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
+
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+{
+	cpus_read_lock();
+	__cpuhp_remove_state_cpuslocked(state, invoke);
+	cpus_read_unlock();
 }
 EXPORT_SYMBOL(__cpuhp_remove_state);
 
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -389,6 +389,7 @@ static atomic_t nr_switch_events __read_
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
 static struct srcu_struct pmus_srcu;
+static cpumask_var_t perf_online_mask;
 
 /*
  * perf event paranoia level:
@@ -4055,14 +4056,6 @@ find_get_context(struct pmu *pmu, struct
 		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 			return ERR_PTR(-EACCES);
 
-		/*
-		 * We could be clever and allow to attach a event to an
-		 * offline CPU and activate it when the CPU comes up, but
-		 * that's for later.
-		 */
-		if (!cpu_online(cpu))
-			return ERR_PTR(-ENODEV);
-
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 		ctx = &cpuctx->ctx;
 		get_ctx(ctx);
@@ -8088,7 +8081,8 @@ static int swevent_hlist_get_cpu(int cpu
 	int err = 0;
 
 	mutex_lock(&swhash->hlist_mutex);
-	if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
+	if (!swevent_hlist_deref(swhash) &&
+	    cpumask_test_cpu(cpu, perf_online_mask)) {
 		struct swevent_hlist *hlist;
 
 		hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -8109,7 +8103,7 @@ static int swevent_hlist_get(void)
 {
 	int err, cpu, failed_cpu;
 
-	get_online_cpus();
+	mutex_lock(&pmus_lock);
 	for_each_possible_cpu(cpu) {
 		err = swevent_hlist_get_cpu(cpu);
 		if (err) {
@@ -8117,8 +8111,7 @@ static int swevent_hlist_get(void)
 			goto fail;
 		}
 	}
-	put_online_cpus();
-
+	mutex_unlock(&pmus_lock);
 	return 0;
 fail:
 	for_each_possible_cpu(cpu) {
@@ -8126,8 +8119,7 @@ fail:
 			break;
 		swevent_hlist_put_cpu(cpu);
 	}
-
-	put_online_cpus();
+	mutex_unlock(&pmus_lock);
 	return err;
 }
 
@@ -9453,7 +9445,7 @@ perf_event_mux_interval_ms_store(struct
 	pmu->hrtimer_interval_ms = timer;
 
 	/* update all cpuctx for this PMU */
-	get_online_cpus();
+	cpus_read_lock();
 	for_each_online_cpu(cpu) {
 		struct perf_cpu_context *cpuctx;
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
@@ -9462,7 +9454,7 @@ perf_event_mux_interval_ms_store(struct
 		cpu_function_call(cpu,
 			(remote_function_f)perf_mux_hrtimer_restart, cpuctx);
 	}
-	put_online_cpus();
+	cpus_read_unlock();
 	mutex_unlock(&mux_interval_mutex);
 
 	return count;
@@ -9592,6 +9584,7 @@ skip_type:
 		lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
 		lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
 		cpuctx->ctx.pmu = pmu;
+		cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
 
 		__perf_mux_hrtimer_init(cpuctx, cpu);
 	}
@@ -10457,12 +10450,10 @@ SYSCALL_DEFINE5(perf_event_open,
 		goto err_task;
 	}
 
-	get_online_cpus();
-
 	if (task) {
 		err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
 		if (err)
-			goto err_cpus;
+			goto err_cred;
 
 		/*
 		 * Reuse ptrace permission checks for now.
@@ -10655,6 +10646,23 @@ SYSCALL_DEFINE5(perf_event_open,
 		goto err_locked;
 	}
 
+	if (!task) {
+		/*
+		 * Check if the @cpu we're creating an event for is online.
+		 *
+		 * We use the perf_cpu_context::ctx::mutex to serialize against
+		 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+		 */
+		struct perf_cpu_context *cpuctx =
+			container_of(ctx, struct perf_cpu_context, ctx);
+
+		if (!cpuctx->online) {
+			err = -ENODEV;
+			goto err_locked;
+		}
+	}
+
+
 	/*
 	 * Must be under the same ctx::mutex as perf_install_in_context(),
 	 * because we need to serialize with concurrent event creation.
@@ -10739,8 +10747,6 @@ SYSCALL_DEFINE5(perf_event_open,
 		put_task_struct(task);
 	}
 
-	put_online_cpus();
-
 	mutex_lock(&current->perf_event_mutex);
 	list_add_tail(&event->owner_entry, &current->perf_event_list);
 	mutex_unlock(&current->perf_event_mutex);
@@ -10774,8 +10780,6 @@ err_alloc:
 err_cred:
 	if (task)
 		mutex_unlock(&task->signal->cred_guard_mutex);
-err_cpus:
-	put_online_cpus();
 err_task:
 	if (task)
 		put_task_struct(task);
@@ -10830,6 +10834,21 @@ perf_event_create_kernel_counter(struct
 		goto err_unlock;
 	}
 
+	if (!task) {
+		/*
+		 * Check if the @cpu we're creating an event for is online.
+		 *
+		 * We use the perf_cpu_context::ctx::mutex to serialize against
+		 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+		 */
+		struct perf_cpu_context *cpuctx =
+			container_of(ctx, struct perf_cpu_context, ctx);
+		if (!cpuctx->online) {
+			err = -ENODEV;
+			goto err_unlock;
+		}
+	}
+
 	if (!exclusive_event_installable(event, ctx)) {
 		err = -EBUSY;
 		goto err_unlock;
@@ -11527,6 +11546,8 @@ static void __init perf_event_init_all_c
 	struct swevent_htable *swhash;
 	int cpu;
 
+	zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
+
 	for_each_possible_cpu(cpu) {
 		swhash = &per_cpu(swevent_htable, cpu);
 		mutex_init(&swhash->hlist_mutex);
@@ -11542,7 +11563,7 @@ static void __init perf_event_init_all_c
 	}
 }
 
-int perf_event_init_cpu(unsigned int cpu)
+void perf_swevent_init_cpu(unsigned int cpu)
 {
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
@@ -11555,7 +11576,6 @@ int perf_event_init_cpu(unsigned int cpu
 		rcu_assign_pointer(swhash->swevent_hlist, hlist);
 	}
 	mutex_unlock(&swhash->hlist_mutex);
-	return 0;
 }
 
 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
@@ -11573,19 +11593,22 @@ static void __perf_event_exit_context(vo
 
 static void perf_event_exit_cpu_context(int cpu)
 {
+	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
 	struct pmu *pmu;
-	int idx;
 
-	idx = srcu_read_lock(&pmus_srcu);
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
+	mutex_lock(&pmus_lock);
+	list_for_each_entry(pmu, &pmus, entry) {
+		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
 
 		mutex_lock(&ctx->mutex);
 		smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+		cpuctx->online = 0;
 		mutex_unlock(&ctx->mutex);
 	}
-	srcu_read_unlock(&pmus_srcu, idx);
+	cpumask_clear_cpu(cpu, perf_online_mask);
+	mutex_unlock(&pmus_lock);
 }
 #else
 
@@ -11593,6 +11616,29 @@ static void perf_event_exit_cpu_context(
 
 #endif
 
+int perf_event_init_cpu(unsigned int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+	struct pmu *pmu;
+
+	perf_swevent_init_cpu(cpu);
+
+	mutex_lock(&pmus_lock);
+	cpumask_set_cpu(cpu, perf_online_mask);
+	list_for_each_entry(pmu, &pmus, entry) {
+		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
+
+		mutex_lock(&ctx->mutex);
+		cpuctx->online = 1;
+		mutex_unlock(&ctx->mutex);
+	}
+	mutex_unlock(&pmus_lock);
+
+	return 0;
+}
+
 int perf_event_exit_cpu(unsigned int cpu)
 {
 	perf_event_exit_cpu_context(cpu);
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -483,11 +483,6 @@ static DECLARE_DELAYED_WORK(optimizing_w
  */
 static void do_optimize_kprobes(void)
 {
-	/* Optimization never be done when disarmed */
-	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
-	    list_empty(&optimizing_list))
-		return;
-
 	/*
 	 * The optimization/unoptimization refers online_cpus via
 	 * stop_machine() and cpu-hotplug modifies online_cpus.
@@ -495,14 +490,19 @@ static void do_optimize_kprobes(void)
 	 * This combination can cause a deadlock (cpu-hotplug try to lock
 	 * text_mutex but stop_machine can not be done because online_cpus
 	 * has been changed)
-	 * To avoid this deadlock, we need to call get_online_cpus()
+	 * To avoid this deadlock, caller must have locked cpu hotplug
 	 * for preventing cpu-hotplug outside of text_mutex locking.
 	 */
-	get_online_cpus();
+	lockdep_assert_cpus_held();
+
+	/* Optimization never be done when disarmed */
+	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+	    list_empty(&optimizing_list))
+		return;
+
 	mutex_lock(&text_mutex);
 	arch_optimize_kprobes(&optimizing_list);
 	mutex_unlock(&text_mutex);
-	put_online_cpus();
 }
 
 /*
@@ -513,12 +513,13 @@ static void do_unoptimize_kprobes(void)
 {
 	struct optimized_kprobe *op, *tmp;
 
+	/* See comment in do_optimize_kprobes() */
+	lockdep_assert_cpus_held();
+
 	/* Unoptimization must be done anytime */
 	if (list_empty(&unoptimizing_list))
 		return;
 
-	/* Ditto to do_optimize_kprobes */
-	get_online_cpus();
 	mutex_lock(&text_mutex);
 	arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
 	/* Loop free_list for disarming */
@@ -537,7 +538,6 @@ static void do_unoptimize_kprobes(void)
 			list_del_init(&op->list);
 	}
 	mutex_unlock(&text_mutex);
-	put_online_cpus();
 }
 
 /* Reclaim all kprobes on the free_list */
@@ -562,6 +562,7 @@ static void kick_kprobe_optimizer(void)
 static void kprobe_optimizer(struct work_struct *work)
 {
 	mutex_lock(&kprobe_mutex);
+	cpus_read_lock();
 	/* Lock modules while optimizing kprobes */
 	mutex_lock(&module_mutex);
 
@@ -587,6 +588,7 @@ static void kprobe_optimizer(struct work
 	do_free_cleaned_kprobes();
 
 	mutex_unlock(&module_mutex);
+	cpus_read_unlock();
 	mutex_unlock(&kprobe_mutex);
 
 	/* Step 5: Kick optimizer again if needed */
@@ -650,9 +652,8 @@ static void optimize_kprobe(struct kprob
 /* Short cut to direct unoptimizing */
 static void force_unoptimize_kprobe(struct optimized_kprobe *op)
 {
-	get_online_cpus();
+	lockdep_assert_cpus_held();
 	arch_unoptimize_kprobe(op);
-	put_online_cpus();
 	if (kprobe_disabled(&op->kp))
 		arch_disarm_kprobe(&op->kp);
 }
@@ -796,6 +797,7 @@ static void try_to_optimize_kprobe(struc
 		return;
 
 	/* For preparing optimization, jump_label_text_reserved() is called */
+	cpus_read_lock();
 	jump_label_lock();
 	mutex_lock(&text_mutex);
 
@@ -817,6 +819,7 @@ static void try_to_optimize_kprobe(struc
 out:
 	mutex_unlock(&text_mutex);
 	jump_label_unlock();
+	cpus_read_unlock();
 }
 
 #ifdef CONFIG_SYSCTL
@@ -831,6 +834,7 @@ static void optimize_all_kprobes(void)
 	if (kprobes_allow_optimization)
 		goto out;
 
+	cpus_read_lock();
 	kprobes_allow_optimization = true;
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
@@ -838,6 +842,7 @@ static void optimize_all_kprobes(void)
 			if (!kprobe_disabled(p))
 				optimize_kprobe(p);
 	}
+	cpus_read_unlock();
 	printk(KERN_INFO "Kprobes globally optimized\n");
 out:
 	mutex_unlock(&kprobe_mutex);
@@ -856,6 +861,7 @@ static void unoptimize_all_kprobes(void)
 		return;
 	}
 
+	cpus_read_lock();
 	kprobes_allow_optimization = false;
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
@@ -864,6 +870,7 @@ static void unoptimize_all_kprobes(void)
 				unoptimize_kprobe(p, false);
 		}
 	}
+	cpus_read_unlock();
 	mutex_unlock(&kprobe_mutex);
 
 	/* Wait for unoptimizing completion */
@@ -1020,14 +1027,11 @@ static void arm_kprobe(struct kprobe *kp
 		arm_kprobe_ftrace(kp);
 		return;
 	}
-	/*
-	 * Here, since __arm_kprobe() doesn't use stop_machine(),
-	 * this doesn't cause deadlock on text_mutex. So, we don't
-	 * need get_online_cpus().
-	 */
+	cpus_read_lock();
 	mutex_lock(&text_mutex);
 	__arm_kprobe(kp);
 	mutex_unlock(&text_mutex);
+	cpus_read_unlock();
 }
 
 /* Disarm a kprobe with text_mutex */
@@ -1037,10 +1041,12 @@ static void disarm_kprobe(struct kprobe
 		disarm_kprobe_ftrace(kp);
 		return;
 	}
-	/* Ditto */
+
+	cpus_read_lock();
 	mutex_lock(&text_mutex);
 	__disarm_kprobe(kp, reopt);
 	mutex_unlock(&text_mutex);
+	cpus_read_unlock();
 }
 
 /*
@@ -1308,13 +1314,10 @@ static int register_aggr_kprobe(struct k
 	int ret = 0;
 	struct kprobe *ap = orig_p;
 
+	cpus_read_lock();
+
 	/* For preparing optimization, jump_label_text_reserved() is called */
 	jump_label_lock();
-	/*
-	 * Get online CPUs to avoid text_mutex deadlock.with stop machine,
-	 * which is invoked by unoptimize_kprobe() in add_new_kprobe()
-	 */
-	get_online_cpus();
 	mutex_lock(&text_mutex);
 
 	if (!kprobe_aggrprobe(orig_p)) {
@@ -1365,8 +1368,8 @@ static int register_aggr_kprobe(struct k
 
 out:
 	mutex_unlock(&text_mutex);
-	put_online_cpus();
 	jump_label_unlock();
+	cpus_read_unlock();
 
 	if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
 		ap->flags &= ~KPROBE_FLAG_DISABLED;
@@ -1568,9 +1571,12 @@ int register_kprobe(struct kprobe *p)
 		goto out;
 	}
 
-	mutex_lock(&text_mutex);	/* Avoiding text modification */
+	cpus_read_lock();
+	/* Prevent text modification */
+	mutex_lock(&text_mutex);
 	ret = prepare_kprobe(p);
 	mutex_unlock(&text_mutex);
+	cpus_read_unlock();
 	if (ret)
 		goto out;
 
@@ -1583,7 +1589,6 @@ int register_kprobe(struct kprobe *p)
 
 	/* Try to optimize kprobe */
 	try_to_optimize_kprobe(p);
-
 out:
 	mutex_unlock(&kprobe_mutex);
 
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -32,7 +32,7 @@ ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
 endif
-obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o rwsem-rt.o
+obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o rwsem-rt.o rwlock-rt.o
 obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
 obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
--- a/kernel/locking/rt.c
+++ b/kernel/locking/rt.c
@@ -198,137 +198,6 @@ void __lockfunc _mutex_unlock(struct mut
 }
 EXPORT_SYMBOL(_mutex_unlock);
 
-/*
- * rwlock_t functions
- */
-int __lockfunc rt_write_trylock(rwlock_t *rwlock)
-{
-	int ret;
-
-	migrate_disable();
-	ret = __rt_mutex_trylock(&rwlock->lock);
-	if (ret)
-		rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
-	else
-		migrate_enable();
-
-	return ret;
-}
-EXPORT_SYMBOL(rt_write_trylock);
-
-int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
-{
-	int ret;
-
-	*flags = 0;
-	ret = rt_write_trylock(rwlock);
-	return ret;
-}
-EXPORT_SYMBOL(rt_write_trylock_irqsave);
-
-int __lockfunc rt_read_trylock(rwlock_t *rwlock)
-{
-	struct rt_mutex *lock = &rwlock->lock;
-	int ret = 1;
-
-	/*
-	 * recursive read locks succeed when current owns the lock,
-	 * but not when read_depth == 0 which means that the lock is
-	 * write locked.
-	 */
-	if (rt_mutex_owner(lock) != current) {
-		migrate_disable();
-		ret = __rt_mutex_trylock(lock);
-		if (ret)
-			rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
-		else
-			migrate_enable();
-
-	} else if (!rwlock->read_depth) {
-		ret = 0;
-	}
-
-	if (ret)
-		rwlock->read_depth++;
-
-	return ret;
-}
-EXPORT_SYMBOL(rt_read_trylock);
-
-void __lockfunc rt_write_lock(rwlock_t *rwlock)
-{
-	rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
-	__rt_spin_lock(&rwlock->lock);
-}
-EXPORT_SYMBOL(rt_write_lock);
-
-void __lockfunc rt_read_lock(rwlock_t *rwlock)
-{
-	struct rt_mutex *lock = &rwlock->lock;
-
-
-	/*
-	 * recursive read locks succeed when current owns the lock
-	 */
-	if (rt_mutex_owner(lock) != current) {
-		rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
-		__rt_spin_lock(lock);
-	}
-	rwlock->read_depth++;
-}
-
-EXPORT_SYMBOL(rt_read_lock);
-
-void __lockfunc rt_write_unlock(rwlock_t *rwlock)
-{
-	/* NOTE: we always pass in '1' for nested, for simplicity */
-	rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
-	__rt_spin_unlock(&rwlock->lock);
-	migrate_enable();
-}
-EXPORT_SYMBOL(rt_write_unlock);
-
-void __lockfunc rt_read_unlock(rwlock_t *rwlock)
-{
-	/* Release the lock only when read_depth is down to 0 */
-	if (--rwlock->read_depth == 0) {
-		rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
-		__rt_spin_unlock(&rwlock->lock);
-		migrate_enable();
-	}
-}
-EXPORT_SYMBOL(rt_read_unlock);
-
-unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
-{
-	rt_write_lock(rwlock);
-
-	return 0;
-}
-EXPORT_SYMBOL(rt_write_lock_irqsave);
-
-unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
-{
-	rt_read_lock(rwlock);
-
-	return 0;
-}
-EXPORT_SYMBOL(rt_read_lock_irqsave);
-
-void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	/*
-	 * Make sure we are not reinitializing a held lock:
-	 */
-	debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
-	lockdep_init_map(&rwlock->dep_map, name, key, 0);
-#endif
-	rwlock->lock.save_state = 1;
-	rwlock->read_depth = 0;
-}
-EXPORT_SYMBOL(__rt_rwlock_init);
-
 /**
  * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
  * @cnt: the atomic which we are to dec
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -968,19 +968,14 @@ takeit:
  * preemptible spin_lock functions:
  */
 static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
-					 void  (*slowfn)(struct rt_mutex *lock,
-							 bool mg_off),
-					 bool do_mig_dis)
+					 void  (*slowfn)(struct rt_mutex *lock))
 {
 	might_sleep_no_state_check();
 
-	if (do_mig_dis)
-		migrate_disable();
-
 	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
 		return;
 	else
-		slowfn(lock, do_mig_dis);
+		slowfn(lock);
 }
 
 static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
@@ -1038,22 +1033,16 @@ static int task_blocks_on_rt_mutex(struc
  * We store the current state under p->pi_lock in p->saved_state and
  * the try_to_wake_up() code handles this accordingly.
  */
-static void  noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
-						    bool mg_off)
+void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
+					  struct rt_mutex_waiter *waiter,
+					  unsigned long flags)
 {
 	struct task_struct *lock_owner, *self = current;
-	struct rt_mutex_waiter waiter, *top_waiter;
-	unsigned long flags;
+	struct rt_mutex_waiter *top_waiter;
 	int ret;
 
-	rt_mutex_init_waiter(&waiter, true);
-
-	raw_spin_lock_irqsave(&lock->wait_lock, flags);
-
-	if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
-		raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+	if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL))
 		return;
-	}
 
 	BUG_ON(rt_mutex_owner(lock) == self);
 
@@ -1068,12 +1057,12 @@ static void  noinline __sched rt_spin_lo
 	__set_current_state_no_track(TASK_UNINTERRUPTIBLE);
 	raw_spin_unlock(&self->pi_lock);
 
-	ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
+	ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK);
 	BUG_ON(ret);
 
 	for (;;) {
 		/* Try to acquire the lock again. */
-		if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
+		if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL))
 			break;
 
 		top_waiter = rt_mutex_top_waiter(lock);
@@ -1081,15 +1070,10 @@ static void  noinline __sched rt_spin_lo
 
 		raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
-		debug_rt_mutex_print_deadlock(&waiter);
+		debug_rt_mutex_print_deadlock(waiter);
 
-		if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
-			if (mg_off)
-				migrate_enable();
+		if (top_waiter != waiter || adaptive_wait(lock, lock_owner))
 			schedule();
-			if (mg_off)
-				migrate_disable();
-		}
 
 		raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
@@ -1116,11 +1100,20 @@ static void  noinline __sched rt_spin_lo
 	 */
 	fixup_rt_mutex_waiters(lock);
 
-	BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
-	BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
+	BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock));
+	BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry));
+}
 
-	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
+{
+	struct rt_mutex_waiter waiter;
+	unsigned long flags;
 
+	rt_mutex_init_waiter(&waiter, true);
+
+	raw_spin_lock_irqsave(&lock->wait_lock, flags);
+	rt_spin_lock_slowlock_locked(lock, &waiter, flags);
+	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 	debug_rt_mutex_free_waiter(&waiter);
 }
 
@@ -1130,7 +1123,7 @@ static bool __sched __rt_mutex_unlock_co
 /*
  * Slow path to release a rt_mutex spin_lock style
  */
-static void  noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
+void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
 {
 	unsigned long flags;
 	DEFINE_WAKE_Q(wake_q);
@@ -1145,49 +1138,29 @@ static void  noinline __sched rt_spin_lo
 		rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
 }
 
-void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
-{
-	rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
-	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-}
-EXPORT_SYMBOL(rt_spin_lock__no_mg);
-
 void __lockfunc rt_spin_lock(spinlock_t *lock)
 {
-	rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
+	migrate_disable();
+	rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 }
 EXPORT_SYMBOL(rt_spin_lock);
 
 void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
 {
-	rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
+	rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
 }
-EXPORT_SYMBOL(__rt_spin_lock);
-
-void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
-{
-	rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
-}
-EXPORT_SYMBOL(__rt_spin_lock__no_mg);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
 {
+	migrate_disable();
 	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-	rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
+	rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
 }
 EXPORT_SYMBOL(rt_spin_lock_nested);
 #endif
 
-void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
-{
-	/* NOTE: we always pass in '1' for nested, for simplicity */
-	spin_release(&lock->dep_map, 1, _RET_IP_);
-	rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
-}
-EXPORT_SYMBOL(rt_spin_unlock__no_mg);
-
 void __lockfunc rt_spin_unlock(spinlock_t *lock)
 {
 	/* NOTE: we always pass in '1' for nested, for simplicity */
@@ -1215,17 +1188,6 @@ void __lockfunc rt_spin_unlock_wait(spin
 }
 EXPORT_SYMBOL(rt_spin_unlock_wait);
 
-int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
-{
-	int ret;
-
-	ret = __rt_mutex_trylock(&lock->lock);
-	if (ret)
-		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-	return ret;
-}
-EXPORT_SYMBOL(rt_spin_trylock__no_mg);
-
 int __lockfunc rt_spin_trylock(spinlock_t *lock)
 {
 	int ret;
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -139,6 +139,10 @@ int __sched rt_mutex_slowlock_locked(str
 				     enum rtmutex_chainwalk chwalk,
 				     struct ww_acquire_ctx *ww_ctx,
 				     struct rt_mutex_waiter *waiter);
+void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
+					  struct rt_mutex_waiter *waiter,
+					  unsigned long flags);
+void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock);
 
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # include "rtmutex-debug.h"
--- /dev/null
+++ b/kernel/locking/rwlock-rt.c
@@ -0,0 +1,400 @@
+/*
+ */
+#include <linux/sched/debug.h>
+#include <linux/export.h>
+
+#include "rtmutex_common.h"
+#include <linux/rwlock_types_rt.h>
+
+/*
+ * RT-specific reader/writer locks
+ *
+ * write_lock()
+ *  1) Lock lock->rtmutex
+ *  2) Remove the reader BIAS to force readers into the slow path
+ *  3) Wait until all readers have left the critical region
+ *  4) Mark it write locked
+ *
+ * write_unlock()
+ *  1) Remove the write locked marker
+ *  2) Set the reader BIAS so readers can use the fast path again
+ *  3) Unlock lock->rtmutex to release blocked readers
+ *
+ * read_lock()
+ *  1) Try fast path acquisition (reader BIAS is set)
+ *  2) Take lock->rtmutex.wait_lock which protects the writelocked flag
+ *  3) If !writelocked, acquire it for read
+ *  4) If writelocked, block on lock->rtmutex
+ *  5) unlock lock->rtmutex, goto 1)
+ *
+ * read_unlock()
+ *  1) Try fast path release (reader count != 1)
+ *  2) Wake the writer waiting in write_lock()#3
+ *
+ * read_lock()#3 has the consequence, that rw locks on RT are not writer
+ * fair, but writers, which should be avoided in RT tasks (think tasklist
+ * lock), are subject to the rtmutex priority/DL inheritance mechanism.
+ *
+ * It's possible to make the rw locks writer fair by keeping a list of
+ * active readers. A blocked writer would force all newly incoming readers
+ * to block on the rtmutex, but the rtmutex would have to be proxy locked
+ * for one reader after the other. We can't use multi-reader inheritance
+ * because there is no way to support that with
+ * SCHED_DEADLINE. Implementing the one by one reader boosting/handover
+ * mechanism is a major surgery for a very dubious value.
+ *
+ * The risk of writer starvation is there, but the pathological use cases
+ * which trigger it are not necessarily the typical RT workloads.
+ */
+
+void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name,
+			     struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Make sure we are not reinitializing a held semaphore:
+	 */
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
+	atomic_set(&lock->readers, READER_BIAS);
+	rt_mutex_init(&lock->rtmutex);
+	lock->rtmutex.save_state = 1;
+}
+
+int __read_rt_trylock(struct rt_rw_lock *lock)
+{
+	int r, old;
+
+	/*
+	 * Increment reader count, if lock->readers < 0, i.e. READER_BIAS is
+	 * set.
+	 */
+	for (r = atomic_read(&lock->readers); r < 0;) {
+		old = atomic_cmpxchg(&lock->readers, r, r + 1);
+		if (likely(old == r))
+			return 1;
+		r = old;
+	}
+	return 0;
+}
+
+void __sched __read_rt_lock(struct rt_rw_lock *lock)
+{
+	struct rt_mutex *m = &lock->rtmutex;
+	struct rt_mutex_waiter waiter;
+	unsigned long flags;
+
+	if (__read_rt_trylock(lock))
+		return;
+
+	raw_spin_lock_irqsave(&m->wait_lock, flags);
+	/*
+	 * Allow readers as long as the writer has not completely
+	 * acquired the semaphore for write.
+	 */
+	if (atomic_read(&lock->readers) != WRITER_BIAS) {
+		atomic_inc(&lock->readers);
+		raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+		return;
+	}
+
+	/*
+	 * Call into the slow lock path with the rtmutex->wait_lock
+	 * held, so this can't result in the following race:
+	 *
+	 * Reader1		Reader2		Writer
+	 *			read_lock()
+	 *					write_lock()
+	 *					rtmutex_lock(m)
+	 *					swait()
+	 * read_lock()
+	 * unlock(m->wait_lock)
+	 *			read_unlock()
+	 *			swake()
+	 *					lock(m->wait_lock)
+	 *					lock->writelocked=true
+	 *					unlock(m->wait_lock)
+	 *
+	 *					write_unlock()
+	 *					lock->writelocked=false
+	 *					rtmutex_unlock(m)
+	 *			read_lock()
+	 *					write_lock()
+	 *					rtmutex_lock(m)
+	 *					swait()
+	 * rtmutex_lock(m)
+	 *
+	 * That would put Reader1 behind the writer waiting on
+	 * Reader2 to call read_unlock() which might be unbound.
+	 */
+	rt_mutex_init_waiter(&waiter, false);
+	rt_spin_lock_slowlock_locked(m, &waiter, flags);
+	/*
+	 * The slowlock() above is guaranteed to return with the rtmutex is
+	 * now held, so there can't be a writer active. Increment the reader
+	 * count and immediately drop the rtmutex again.
+	 */
+	atomic_inc(&lock->readers);
+	raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+	rt_spin_lock_slowunlock(m);
+
+	debug_rt_mutex_free_waiter(&waiter);
+}
+
+void __read_rt_unlock(struct rt_rw_lock *lock)
+{
+	struct rt_mutex *m = &lock->rtmutex;
+	struct task_struct *tsk;
+
+	/*
+	 * sem->readers can only hit 0 when a writer is waiting for the
+	 * active readers to leave the critical region.
+	 */
+	if (!atomic_dec_and_test(&lock->readers))
+		return;
+
+	raw_spin_lock_irq(&m->wait_lock);
+	/*
+	 * Wake the writer, i.e. the rtmutex owner. It might release the
+	 * rtmutex concurrently in the fast path, but to clean up the rw
+	 * lock it needs to acquire m->wait_lock. The worst case which can
+	 * happen is a spurious wakeup.
+	 */
+	tsk = rt_mutex_owner(m);
+	if (tsk)
+		wake_up_process(tsk);
+
+	raw_spin_unlock_irq(&m->wait_lock);
+}
+
+static void __write_unlock_common(struct rt_rw_lock *lock, int bias,
+				  unsigned long flags)
+{
+	struct rt_mutex *m = &lock->rtmutex;
+
+	atomic_add(READER_BIAS - bias, &lock->readers);
+	raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+	rt_spin_lock_slowunlock(m);
+}
+
+void __sched __write_rt_lock(struct rt_rw_lock *lock)
+{
+	struct rt_mutex *m = &lock->rtmutex;
+	struct task_struct *self = current;
+	unsigned long flags;
+
+	/* Take the rtmutex as a first step */
+	__rt_spin_lock(m);
+
+	/* Force readers into slow path */
+	atomic_sub(READER_BIAS, &lock->readers);
+
+	for (;;) {
+		raw_spin_lock_irqsave(&m->wait_lock, flags);
+
+		raw_spin_lock(&self->pi_lock);
+		self->saved_state = self->state;
+		__set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+		raw_spin_unlock(&self->pi_lock);
+
+		/* Have all readers left the critical region? */
+		if (!atomic_read(&lock->readers)) {
+			atomic_set(&lock->readers, WRITER_BIAS);
+			raw_spin_lock(&self->pi_lock);
+			__set_current_state_no_track(self->saved_state);
+			self->saved_state = TASK_RUNNING;
+			raw_spin_unlock(&self->pi_lock);
+			raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+			return;
+		}
+
+		raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+
+		if (atomic_read(&lock->readers) != 0)
+			schedule();
+	}
+}
+
+int __write_rt_trylock(struct rt_rw_lock *lock)
+{
+	struct rt_mutex *m = &lock->rtmutex;
+	unsigned long flags;
+
+	if (!rt_mutex_trylock(m))
+		return 0;
+
+	atomic_sub(READER_BIAS, &lock->readers);
+
+	raw_spin_lock_irqsave(&m->wait_lock, flags);
+	if (!atomic_read(&lock->readers)) {
+		atomic_set(&lock->readers, WRITER_BIAS);
+		raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+		return 1;
+	}
+	__write_unlock_common(lock, 0, flags);
+	return 0;
+}
+
+void __write_rt_unlock(struct rt_rw_lock *lock)
+{
+	struct rt_mutex *m = &lock->rtmutex;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&m->wait_lock, flags);
+	__write_unlock_common(lock, WRITER_BIAS, flags);
+}
+
+#ifndef CONFIG_RWLOCK_RT_READER_BIASED
+/* Map the single reader implementation */
+static inline int do_read_rt_trylock(rwlock_t *rwlock)
+{
+	return rt_mutex_trylock(&rwlock->lock);
+}
+
+static inline int do_write_rt_trylock(rwlock_t *rwlock)
+{
+	return rt_mutex_trylock(&rwlock->lock);
+}
+
+static inline void do_read_rt_lock(rwlock_t *rwlock)
+{
+	__rt_spin_lock(&rwlock->lock);
+}
+
+static inline void do_write_rt_lock(rwlock_t *rwlock)
+{
+	__rt_spin_lock(&rwlock->lock);
+}
+
+static inline void do_read_rt_unlock(rwlock_t *rwlock)
+{
+	__rt_spin_unlock(&rwlock->lock);
+}
+
+static inline void do_write_rt_unlock(rwlock_t *rwlock)
+{
+	__rt_spin_unlock(&rwlock->lock);
+}
+
+static inline void do_rwlock_rt_init(rwlock_t *rwlock, const char *name,
+				     struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
+	lockdep_init_map(&rwlock->dep_map, name, key, 0);
+#endif
+	rt_mutex_init(&rwlock->lock);
+	rwlock->lock.save_state = 1;
+}
+
+#else
+/* Map the reader biased implementation */
+static inline int do_read_rt_trylock(rwlock_t *rwlock)
+{
+	return __read_rt_trylock(rwlock);
+}
+
+static inline int do_write_rt_trylock(rwlock_t *rwlock)
+{
+	return __write_rt_trylock(rwlock);
+}
+
+static inline void do_read_rt_lock(rwlock_t *rwlock)
+{
+	__read_rt_lock(rwlock);
+}
+
+static inline void do_write_rt_lock(rwlock_t *rwlock)
+{
+	__write_rt_lock(rwlock);
+}
+
+static inline void do_read_rt_unlock(rwlock_t *rwlock)
+{
+	__read_rt_unlock(rwlock);
+}
+
+static inline void do_write_rt_unlock(rwlock_t *rwlock)
+{
+	__write_rt_unlock(rwlock);
+}
+
+static inline void do_rwlock_rt_init(rwlock_t *rwlock, const char *name,
+				     struct lock_class_key *key)
+{
+	__rwlock_biased_rt_init(rwlock, name, key);
+}
+#endif
+
+/*
+ * The common functions which get wrapped into the rwlock API.
+ */
+int __lockfunc rt_read_trylock(rwlock_t *rwlock)
+{
+	int ret;
+
+	migrate_disable();
+	ret = do_read_rt_trylock(rwlock);
+	if (ret)
+		rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
+	else
+		migrate_enable();
+	return ret;
+}
+EXPORT_SYMBOL(rt_read_trylock);
+
+int __lockfunc rt_write_trylock(rwlock_t *rwlock)
+{
+	int ret;
+
+	migrate_disable();
+	ret = do_write_rt_trylock(rwlock);
+	if (ret)
+		rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
+	else
+		migrate_enable();
+	return ret;
+}
+EXPORT_SYMBOL(rt_write_trylock);
+
+void __lockfunc rt_read_lock(rwlock_t *rwlock)
+{
+	migrate_disable();
+	rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
+	do_read_rt_lock(rwlock);
+}
+EXPORT_SYMBOL(rt_read_lock);
+
+void __lockfunc rt_write_lock(rwlock_t *rwlock)
+{
+	migrate_disable();
+	rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
+	do_write_rt_lock(rwlock);
+}
+EXPORT_SYMBOL(rt_write_lock);
+
+void __lockfunc rt_read_unlock(rwlock_t *rwlock)
+{
+	rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
+	do_read_rt_unlock(rwlock);
+	migrate_enable();
+}
+EXPORT_SYMBOL(rt_read_unlock);
+
+void __lockfunc rt_write_unlock(rwlock_t *rwlock)
+{
+	rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
+	do_write_rt_unlock(rwlock);
+	migrate_enable();
+}
+EXPORT_SYMBOL(rt_write_unlock);
+
+void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
+{
+	do_rwlock_rt_init(rwlock, name, key);
+}
+EXPORT_SYMBOL(__rt_rwlock_init);
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -966,29 +966,18 @@ static struct kobj_type padata_attr_type
 };
 
 /**
- * padata_alloc_possible - Allocate and initialize padata instance.
- *                         Use the cpu_possible_mask for serial and
- *                         parallel workers.
- *
- * @wq: workqueue to use for the allocated padata instance
- */
-struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
-{
-	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
-}
-EXPORT_SYMBOL(padata_alloc_possible);
-
-/**
  * padata_alloc - allocate and initialize a padata instance and specify
  *                cpumasks for serial and parallel workers.
  *
  * @wq: workqueue to use for the allocated padata instance
  * @pcpumask: cpumask that will be used for padata parallelization
  * @cbcpumask: cpumask that will be used for padata serialization
+ *
+ * Must be called from a cpus_read_lock() protected region
  */
-struct padata_instance *padata_alloc(struct workqueue_struct *wq,
-				     const struct cpumask *pcpumask,
-				     const struct cpumask *cbcpumask)
+static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+					    const struct cpumask *pcpumask,
+					    const struct cpumask *cbcpumask)
 {
 	struct padata_instance *pinst;
 	struct parallel_data *pd = NULL;
@@ -997,7 +986,6 @@ struct padata_instance *padata_alloc(str
 	if (!pinst)
 		goto err;
 
-	get_online_cpus();
 	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
 		goto err_free_inst;
 	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
@@ -1021,14 +1009,12 @@ struct padata_instance *padata_alloc(str
 
 	pinst->flags = 0;
 
-	put_online_cpus();
-
 	BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
 	kobject_init(&pinst->kobj, &padata_attr_type);
 	mutex_init(&pinst->lock);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	cpuhp_state_add_instance_nocalls(hp_online, &pinst->node);
+	cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
 #endif
 	return pinst;
 
@@ -1037,12 +1023,27 @@ err_free_masks:
 	free_cpumask_var(pinst->cpumask.cbcpu);
 err_free_inst:
 	kfree(pinst);
-	put_online_cpus();
 err:
 	return NULL;
 }
 
 /**
+ * padata_alloc_possible - Allocate and initialize padata instance.
+ *                         Use the cpu_possible_mask for serial and
+ *                         parallel workers.
+ *
+ * @wq: workqueue to use for the allocated padata instance
+ *
+ * Must be called from a cpus_read_lock() protected region
+ */
+struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
+{
+	lockdep_assert_cpus_held();
+	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+}
+EXPORT_SYMBOL(padata_alloc_possible);
+
+/**
  * padata_free - free a padata instance
  *
  * @padata_inst: padata instance to free
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1223,84 +1223,6 @@ void do_set_cpus_allowed(struct task_str
 	__do_set_cpus_allowed_tail(p, new_mask);
 }
 
-static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
-static DEFINE_MUTEX(sched_down_mutex);
-static cpumask_t sched_down_cpumask;
-
-void tell_sched_cpu_down_begin(int cpu)
-{
-	mutex_lock(&sched_down_mutex);
-	cpumask_set_cpu(cpu, &sched_down_cpumask);
-	mutex_unlock(&sched_down_mutex);
-}
-
-void tell_sched_cpu_down_done(int cpu)
-{
-	mutex_lock(&sched_down_mutex);
-	cpumask_clear_cpu(cpu, &sched_down_cpumask);
-	mutex_unlock(&sched_down_mutex);
-}
-
-/**
- * migrate_me - try to move the current task off this cpu
- *
- * Used by the pin_current_cpu() code to try to get tasks
- * to move off the current CPU as it is going down.
- * It will only move the task if the task isn't pinned to
- * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
- * and the task has to be in a RUNNING state. Otherwise the
- * movement of the task will wake it up (change its state
- * to running) when the task did not expect it.
- *
- * Returns 1 if it succeeded in moving the current task
- *         0 otherwise.
- */
-int migrate_me(void)
-{
-	struct task_struct *p = current;
-	struct migration_arg arg;
-	struct cpumask *cpumask;
-	const struct cpumask *mask;
-	unsigned int dest_cpu;
-	struct rq_flags rf;
-	struct rq *rq;
-
-	/*
-	 * We can not migrate tasks bounded to a CPU or tasks not
-	 * running. The movement of the task will wake it up.
-	 */
-	if (p->flags & PF_NO_SETAFFINITY || p->state)
-		return 0;
-
-	mutex_lock(&sched_down_mutex);
-	rq = task_rq_lock(p, &rf);
-
-	cpumask = this_cpu_ptr(&sched_cpumasks);
-	mask = p->cpus_ptr;
-
-	cpumask_andnot(cpumask, mask, &sched_down_cpumask);
-
-	if (!cpumask_weight(cpumask)) {
-		/* It's only on this CPU? */
-		task_rq_unlock(rq, p, &rf);
-		mutex_unlock(&sched_down_mutex);
-		return 0;
-	}
-
-	dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
-
-	arg.task = p;
-	arg.dest_cpu = dest_cpu;
-
-	task_rq_unlock(rq, p, &rf);
-
-	stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-	tlb_migrate_finish(p->mm);
-	mutex_unlock(&sched_down_mutex);
-
-	return 1;
-}
-
 /*
  * Change a given task's CPU affinity. Migrate the thread to a
  * proper CPU and schedule it away if the CPU it's executing on
@@ -7667,8 +7589,6 @@ void migrate_disable(void)
 		return;
 	}
 
-	/* get_online_cpus(); */
-
 	preempt_disable();
 	preempt_lazy_disable();
 	pin_current_cpu();
@@ -7744,13 +7664,11 @@ void migrate_enable(void)
 			preempt_enable();
 			stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
 			tlb_migrate_finish(p->mm);
-			/* put_online_cpus(); */
 
 			return;
 		}
 	}
 	unpin_current_cpu();
-	/* put_online_cpus(); */
 	preempt_lazy_enable();
 	preempt_enable();
 }
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -577,7 +577,8 @@ static int __init cpu_stop_init(void)
 }
 early_initcall(cpu_stop_init);
 
-static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+			    const struct cpumask *cpus)
 {
 	struct multi_stop_data msdata = {
 		.fn = fn,
@@ -586,6 +587,8 @@ static int __stop_machine(cpu_stop_fn_t
 		.active_cpus = cpus,
 	};
 
+	lockdep_assert_cpus_held();
+
 	if (!stop_machine_initialized) {
 		/*
 		 * Handle the case where stop_machine() is called
@@ -615,9 +618,9 @@ int stop_machine(cpu_stop_fn_t fn, void
 	int ret;
 
 	/* No CPUs can come up or down during this. */
-	get_online_cpus();
-	ret = __stop_machine(fn, data, cpus);
-	put_online_cpus();
+	cpus_read_lock();
+	ret = stop_machine_cpuslocked(fn, data, cpus);
+	cpus_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(stop_machine);
kernel / kernel-source

Source Code

Files