From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 4 Aug 2017 19:09:07 +0200
Subject: Add the hotplug rework including all its side stories
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git
Git-commit: 6fb87cb1181786dc88432c6c6093a02a897e5789
Patch-mainline: Queued in subsystem maintainer repository
References: SLE15 Realtime Extension
This is the hotplug rework. It includes dropping the old hotplug
workarounds, adding new rwlock implementation for RT, cherry-picking a
few commits from upstream and so on.
This is an all-in-one commit since dropping old patches and replacing
with ones is quite painfull.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Mike Galbraith <mgalbraith@suse.de>
---
arch/alpha/include/asm/spinlock_types.h | 4
arch/arm/include/asm/spinlock_types.h | 4
arch/arm/kernel/hw_breakpoint.c | 11
arch/arm/kernel/patch.c | 2
arch/arm/probes/kprobes/core.c | 3
arch/arm64/include/asm/insn.h | 1
arch/arm64/include/asm/spinlock_types.h | 4
arch/arm64/kernel/insn.c | 5
arch/blackfin/include/asm/spinlock_types.h | 4
arch/hexagon/include/asm/spinlock_types.h | 4
arch/ia64/include/asm/spinlock_types.h | 4
arch/m32r/include/asm/spinlock_types.h | 4
arch/metag/include/asm/spinlock_types.h | 4
arch/mips/include/asm/spinlock_types.h | 4
arch/mn10300/include/asm/spinlock_types.h | 4
arch/powerpc/include/asm/spinlock_types.h | 4
arch/powerpc/kvm/book3s_hv.c | 14
arch/powerpc/platforms/powernv/subcore.c | 7
arch/s390/include/asm/spinlock_types.h | 4
arch/s390/kernel/kprobes.c | 4
arch/s390/kernel/time.c | 6
arch/sh/include/asm/spinlock_types.h | 4
arch/sparc/include/asm/spinlock_types.h | 4
arch/tile/include/asm/spinlock_types.h | 4
arch/x86/events/core.c | 1
arch/x86/events/intel/core.c | 11
arch/x86/kernel/cpu/mtrr/main.c | 2
arch/xtensa/include/asm/spinlock_types.h | 4
drivers/acpi/processor_driver.c | 4
drivers/acpi/processor_throttling.c | 16
drivers/cpufreq/cpufreq.c | 21
drivers/hwtracing/coresight/coresight-etm3x.c | 20
drivers/hwtracing/coresight/coresight-etm4x.c | 20
drivers/pci/pci-driver.c | 47 +-
include/linux/cpu.h | 40 +
include/linux/cpuhotplug.h | 38 +
include/linux/locallock.h | 22 -
include/linux/padata.h | 3
include/linux/pci.h | 1
include/linux/perf_event.h | 2
include/linux/rwlock_rt.h | 45 +-
include/linux/rwlock_types_rt.h | 59 ++
include/linux/sched.h | 18
include/linux/spinlock_rt.h | 7
include/linux/spinlock_types_up.h | 4
include/linux/stop_machine.h | 26 +
kernel/Kconfig.locks | 17
kernel/cpu.c | 552 ++++++--------------------
kernel/events/core.c | 106 +++-
kernel/kprobes.c | 59 +-
kernel/locking/Makefile | 2
kernel/locking/rt.c | 131 ------
kernel/locking/rtmutex.c | 96 +---
kernel/locking/rtmutex_common.h | 4
kernel/locking/rwlock-rt.c | 400 ++++++++++++++++++
kernel/padata.c | 43 +-
kernel/sched/core.c | 82 ---
kernel/stop_machine.c | 11
58 files changed, 1006 insertions(+), 1021 deletions(-)
create mode 100644 kernel/locking/rwlock-rt.c
--- a/arch/alpha/include/asm/spinlock_types.h
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef _ALPHA_SPINLOCK_TYPES_H
#define _ALPHA_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
#define TICKET_SHIFT 16
typedef struct {
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1090,7 +1090,7 @@ static int __init arch_hw_breakpoint_ini
* driven low on this core and there isn't an architected way to
* determine that.
*/
- get_online_cpus();
+ cpus_read_lock();
register_undef_hook(&debug_reg_hook);
/*
@@ -1098,15 +1098,16 @@ static int __init arch_hw_breakpoint_ini
* assume that a halting debugger will leave the world in a nice state
* for us.
*/
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm/hw_breakpoint:online",
- dbg_reset_online, NULL);
+ ret = cpuhp_setup_state_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "arm/hw_breakpoint:online",
+ dbg_reset_online, NULL);
unregister_undef_hook(&debug_reg_hook);
if (WARN_ON(ret < 0) || !cpumask_empty(&debug_err_mask)) {
core_num_brps = 0;
core_num_wrps = 0;
if (ret > 0)
cpuhp_remove_state_nocalls(ret);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
@@ -1124,7 +1125,7 @@ static int __init arch_hw_breakpoint_ini
TRAP_HWBKPT, "watchpoint debug exception");
hook_ifault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
TRAP_HWBKPT, "breakpoint debug exception");
- put_online_cpus();
+ cpus_read_unlock();
/* Register PM notifiers. */
pm_init();
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@@ -124,5 +124,5 @@ void __kprobes patch_text(void *addr, un
.insn = insn,
};
- stop_machine(patch_text_stop_machine, &patch, NULL);
+ stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL);
}
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -182,7 +182,8 @@ void __kprobes kprobes_remove_breakpoint
.addr = addr,
.insn = insn,
};
- stop_machine(__kprobes_remove_breakpoint, &p, cpu_online_mask);
+ stop_machine_cpuslocked(__kprobes_remove_breakpoint, &p,
+ cpu_online_mask);
}
void __kprobes arch_disarm_kprobe(struct kprobe *p)
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -433,7 +433,6 @@ u32 aarch64_set_branch_offset(u32 insn,
bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
-int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
s32 aarch64_insn_adrp_get_offset(u32 insn);
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -16,10 +16,6 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
-# error "please don't include this file directly"
-#endif
-
#include <linux/types.h>
#define TICKET_SHIFT 16
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -255,6 +255,7 @@ static int __kprobes aarch64_insn_patch_
return ret;
}
+static
int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
{
struct aarch64_insn_patch patch = {
@@ -267,8 +268,8 @@ int __kprobes aarch64_insn_patch_text_sy
if (cnt <= 0)
return -EINVAL;
- return stop_machine(aarch64_insn_patch_text_cb, &patch,
- cpu_online_mask);
+ return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
+ cpu_online_mask);
}
int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
--- a/arch/blackfin/include/asm/spinlock_types.h
+++ b/arch/blackfin/include/asm/spinlock_types.h
@@ -7,10 +7,6 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
#include <asm/rwlock.h>
typedef struct {
--- a/arch/hexagon/include/asm/spinlock_types.h
+++ b/arch/hexagon/include/asm/spinlock_types.h
@@ -21,10 +21,6 @@
#ifndef _ASM_SPINLOCK_TYPES_H
#define _ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef _ASM_IA64_SPINLOCK_TYPES_H
#define _ASM_IA64_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
--- a/arch/m32r/include/asm/spinlock_types.h
+++ b/arch/m32r/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef _ASM_M32R_SPINLOCK_TYPES_H
#define _ASM_M32R_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile int slock;
} arch_spinlock_t;
--- a/arch/metag/include/asm/spinlock_types.h
+++ b/arch/metag/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef _ASM_METAG_SPINLOCK_TYPES_H
#define _ASM_METAG_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
--- a/arch/mips/include/asm/spinlock_types.h
+++ b/arch/mips/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef _ASM_SPINLOCK_TYPES_H
#define _ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
#include <linux/types.h>
#include <asm/byteorder.h>
--- a/arch/mn10300/include/asm/spinlock_types.h
+++ b/arch/mn10300/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef _ASM_SPINLOCK_TYPES_H
#define _ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct arch_spinlock {
unsigned int slock;
} arch_spinlock_t;
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
#define _ASM_POWERPC_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int slock;
} arch_spinlock_t;
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3871,7 +3871,7 @@ void kvmppc_alloc_host_rm_ops(void)
return;
}
- get_online_cpus();
+ cpus_read_lock();
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
@@ -3893,17 +3893,17 @@ void kvmppc_alloc_host_rm_ops(void)
l_ops = (unsigned long) ops;
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
- put_online_cpus();
+ cpus_read_unlock();
kfree(ops->rm_core);
kfree(ops);
return;
}
- cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
- "ppc/kvm_book3s:prepare",
- kvmppc_set_host_core,
- kvmppc_clear_host_core);
- put_online_cpus();
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+ "ppc/kvm_book3s:prepare",
+ kvmppc_set_host_core,
+ kvmppc_clear_host_core);
+ cpus_read_unlock();
}
void kvmppc_free_host_rm_ops(void)
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -349,7 +349,7 @@ static int set_subcores_per_core(int new
state->master = 0;
}
- get_online_cpus();
+ cpus_read_lock();
/* This cpu will update the globals before exiting stop machine */
this_cpu_ptr(&split_state)->master = 1;
@@ -357,9 +357,10 @@ static int set_subcores_per_core(int new
/* Ensure state is consistent before we call the other cpus */
mb();
- stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
+ stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
+ cpu_online_mask);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
int lock;
} __attribute__ ((aligned (4))) arch_spinlock_t;
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -196,7 +196,7 @@ void arch_arm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- stop_machine(swap_instruction, &args, NULL);
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
@@ -204,7 +204,7 @@ void arch_disarm_kprobe(struct kprobe *p
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- stop_machine(swap_instruction, &args, NULL);
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -677,10 +677,10 @@ static void stp_work_fn(struct work_stru
goto out_unlock;
memset(&stp_sync, 0, sizeof(stp_sync));
- get_online_cpus();
+ cpus_read_lock();
atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
- stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask);
- put_online_cpus();
+ stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+ cpus_read_unlock();
if (!check_sync_clock())
/*
--- a/arch/sh/include/asm/spinlock_types.h
+++ b/arch/sh/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef __ASM_SH_SPINLOCK_TYPES_H
#define __ASM_SH_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int lock;
} arch_spinlock_t;
--- a/arch/sparc/include/asm/spinlock_types.h
+++ b/arch/sparc/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef __SPARC_SPINLOCK_TYPES_H
#define __SPARC_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned char lock;
} arch_spinlock_t;
--- a/arch/tile/include/asm/spinlock_types.h
+++ b/arch/tile/include/asm/spinlock_types.h
@@ -15,10 +15,6 @@
#ifndef _ASM_TILE_SPINLOCK_TYPES_H
#define _ASM_TILE_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
#ifdef __tilegx__
/* Low 15 bits are "next"; high 15 bits are "current". */
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2302,7 +2302,6 @@ void perf_check_microcode(void)
if (x86_pmu.check_microcode)
x86_pmu.check_microcode();
}
-EXPORT_SYMBOL_GPL(perf_check_microcode);
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3889,12 +3889,10 @@ static void intel_snb_check_microcode(vo
int pebs_broken = 0;
int cpu;
- get_online_cpus();
for_each_online_cpu(cpu) {
if ((pebs_broken = intel_snb_pebs_broken(cpu)))
break;
}
- put_online_cpus();
if (pebs_broken == x86_pmu.pebs_broken)
return;
@@ -3967,7 +3965,9 @@ static bool check_msr(unsigned long msr,
static __init void intel_sandybridge_quirk(void)
{
x86_pmu.check_microcode = intel_snb_check_microcode;
+ cpus_read_lock();
intel_snb_check_microcode();
+ cpus_read_unlock();
}
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
@@ -4834,13 +4834,12 @@ static __init int fixup_ht_bug(void)
lockup_detector_resume();
- get_online_cpus();
+ cpus_read_lock();
- for_each_online_cpu(c) {
+ for_each_online_cpu(c)
free_excl_cntrs(&per_cpu(cpu_hw_events, c));
- }
- put_online_cpus();
+ cpus_read_unlock();
pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
return 0;
}
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -807,10 +807,8 @@ void mtrr_save_state(void)
if (!mtrr_enabled())
return;
- get_online_cpus();
first_cpu = cpumask_first(cpu_online_mask);
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
- put_online_cpus();
}
void set_mtrr_aps_delayed_init(void)
--- a/arch/xtensa/include/asm/spinlock_types.h
+++ b/arch/xtensa/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
typedef struct {
volatile unsigned int slock;
} arch_spinlock_t;
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -268,9 +268,9 @@ static int acpi_processor_start(struct d
return -ENODEV;
/* Protect against concurrent CPU hotplug operations */
- get_online_cpus();
+ cpu_hotplug_disable();
ret = __acpi_processor_start(device);
- put_online_cpus();
+ cpu_hotplug_enable();
return ret;
}
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -909,6 +909,13 @@ static long __acpi_processor_get_throttl
return pr->throttling.acpi_processor_get_throttling(pr);
}
+static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct)
+{
+ if (direct || (is_percpu_thread() && cpu == smp_processor_id()))
+ return fn(arg);
+ return work_on_cpu(cpu, fn, arg);
+}
+
static int acpi_processor_get_throttling(struct acpi_processor *pr)
{
if (!pr)
@@ -926,7 +933,7 @@ static int acpi_processor_get_throttling
if (!cpu_online(pr->id))
return -ENODEV;
- return work_on_cpu(pr->id, __acpi_processor_get_throttling, pr);
+ return call_on_cpu(pr->id, __acpi_processor_get_throttling, pr, false);
}
static int acpi_processor_get_fadt_info(struct acpi_processor *pr)
@@ -1076,13 +1083,6 @@ static long acpi_processor_throttling_fn
arg->target_state, arg->force);
}
-static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct)
-{
- if (direct)
- return fn(arg);
- return work_on_cpu(cpu, fn, arg);
-}
-
static int __acpi_processor_set_throttling(struct acpi_processor *pr,
int state, bool force, bool direct)
{
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -962,7 +962,7 @@ static ssize_t store(struct kobject *kob
struct freq_attr *fattr = to_attr(attr);
ssize_t ret = -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
if (cpu_online(policy->cpu)) {
down_write(&policy->rwsem);
@@ -970,7 +970,7 @@ static ssize_t store(struct kobject *kob
up_write(&policy->rwsem);
}
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -2531,7 +2531,7 @@ int cpufreq_register_driver(struct cpufr
pr_debug("trying to register driver %s\n", driver_data->name);
/* Protect against concurrent CPU online/offline. */
- get_online_cpus();
+ cpus_read_lock();
write_lock_irqsave(&cpufreq_driver_lock, flags);
if (cpufreq_driver) {
@@ -2564,9 +2564,10 @@ int cpufreq_register_driver(struct cpufr
goto err_if_unreg;
}
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
- cpuhp_cpufreq_online,
- cpuhp_cpufreq_offline);
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "cpufreq:online",
+ cpuhp_cpufreq_online,
+ cpuhp_cpufreq_offline);
if (ret < 0)
goto err_if_unreg;
hp_online = ret;
@@ -2584,7 +2585,7 @@ err_null_driver:
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
out:
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_register_driver);
@@ -2607,17 +2608,17 @@ int cpufreq_unregister_driver(struct cpu
pr_debug("unregistering driver %s\n", driver->name);
/* Protect against concurrent cpu hotplug */
- get_online_cpus();
+ cpus_read_lock();
subsys_interface_unregister(&cpufreq_interface);
remove_boost_sysfs_file();
- cpuhp_remove_state_nocalls(hp_online);
+ cpuhp_remove_state_nocalls_cpuslocked(hp_online);
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -587,7 +587,7 @@ static void etm_disable_sysfs(struct cor
* after cpu online mask indicates the cpu is offline but before the
* DYING hotplug callback is serviced by the ETM driver.
*/
- get_online_cpus();
+ cpus_read_lock();
spin_lock(&drvdata->spinlock);
/*
@@ -597,7 +597,7 @@ static void etm_disable_sysfs(struct cor
smp_call_function_single(drvdata->cpu, etm_disable_hw, drvdata, 1);
spin_unlock(&drvdata->spinlock);
- put_online_cpus();
+ cpus_read_unlock();
dev_info(drvdata->dev, "ETM tracing disabled\n");
}
@@ -795,7 +795,7 @@ static int etm_probe(struct amba_device
drvdata->cpu = pdata ? pdata->cpu : 0;
- get_online_cpus();
+ cpus_read_lock();
etmdrvdata[drvdata->cpu] = drvdata;
if (smp_call_function_single(drvdata->cpu,
@@ -803,17 +803,17 @@ static int etm_probe(struct amba_device
dev_err(dev, "ETM arch init failed\n");
if (!etm_count++) {
- cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
- "arm/coresight:starting",
- etm_starting_cpu, etm_dying_cpu);
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "arm/coresight:online",
- etm_online_cpu, NULL);
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+ "arm/coresight:starting",
+ etm_starting_cpu, etm_dying_cpu);
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "arm/coresight:online",
+ etm_online_cpu, NULL);
if (ret < 0)
goto err_arch_supported;
hp_online = ret;
}
- put_online_cpus();
+ cpus_read_unlock();
if (etm_arch_supported(drvdata->arch) == false) {
ret = -EINVAL;
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -372,7 +372,7 @@ static void etm4_disable_sysfs(struct co
* after cpu online mask indicates the cpu is offline but before the
* DYING hotplug callback is serviced by the ETM driver.
*/
- get_online_cpus();
+ cpus_read_lock();
spin_lock(&drvdata->spinlock);
/*
@@ -382,7 +382,7 @@ static void etm4_disable_sysfs(struct co
smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1);
spin_unlock(&drvdata->spinlock);
- put_online_cpus();
+ cpus_read_unlock();
dev_info(drvdata->dev, "ETM tracing disabled\n");
}
@@ -983,7 +983,7 @@ static int etm4_probe(struct amba_device
drvdata->cpu = pdata ? pdata->cpu : 0;
- get_online_cpus();
+ cpus_read_lock();
etmdrvdata[drvdata->cpu] = drvdata;
if (smp_call_function_single(drvdata->cpu,
@@ -991,18 +991,18 @@ static int etm4_probe(struct amba_device
dev_err(dev, "ETM arch init failed\n");
if (!etm4_count++) {
- cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
- "arm/coresight4:starting",
- etm4_starting_cpu, etm4_dying_cpu);
- ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "arm/coresight4:online",
- etm4_online_cpu, NULL);
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+ "arm/coresight4:starting",
+ etm4_starting_cpu, etm4_dying_cpu);
+ ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "arm/coresight4:online",
+ etm4_online_cpu, NULL);
if (ret < 0)
goto err_arch_supported;
hp_online = ret;
}
- put_online_cpus();
+ cpus_read_unlock();
if (etm4_arch_supported(drvdata->arch) == false) {
ret = -EINVAL;
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -323,10 +323,19 @@ static long local_pci_probe(void *_ddi)
return 0;
}
+static bool pci_physfn_is_probed(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_IOV
+ return dev->is_virtfn && dev->physfn->is_probed;
+#else
+ return false;
+#endif
+}
+
static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
const struct pci_device_id *id)
{
- int error, node;
+ int error, node, cpu;
struct drv_dev_and_id ddi = { drv, dev, id };
/*
@@ -335,33 +344,27 @@ static int pci_call_probe(struct pci_dri
* on the right node.
*/
node = dev_to_node(&dev->dev);
+ dev->is_probed = 1;
+
+ cpu_hotplug_disable();
/*
- * On NUMA systems, we are likely to call a PF probe function using
- * work_on_cpu(). If that probe calls pci_enable_sriov() (which
- * adds the VF devices via pci_bus_add_device()), we may re-enter
- * this function to call the VF probe function. Calling
- * work_on_cpu() again will cause a lockdep warning. Since VFs are
- * always on the same node as the PF, we can work around this by
- * avoiding work_on_cpu() when we're already on the correct node.
- *
- * Preemption is enabled, so it's theoretically unsafe to use
- * numa_node_id(), but even if we run the probe function on the
- * wrong node, it should be functionally correct.
+ * Prevent nesting work_on_cpu() for the case where a Virtual Function
+ * device is probed from work_on_cpu() of the Physical device.
*/
- if (node >= 0 && node != numa_node_id()) {
- int cpu;
-
- get_online_cpus();
+ if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
+ pci_physfn_is_probed(dev))
+ cpu = nr_cpu_ids;
+ else
cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
- if (cpu < nr_cpu_ids)
- error = work_on_cpu(cpu, local_pci_probe, &ddi);
- else
- error = local_pci_probe(&ddi);
- put_online_cpus();
- } else
+
+ if (cpu < nr_cpu_ids)
+ error = work_on_cpu(cpu, local_pci_probe, &ddi);
+ else
error = local_pci_probe(&ddi);
+ dev->is_probed = 0;
+ cpu_hotplug_enable();
return error;
}
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -117,12 +117,11 @@ static inline void cpu_maps_update_done(
extern struct bus_type cpu_subsys;
#ifdef CONFIG_HOTPLUG_CPU
-/* Stop CPUs going up and down. */
-
-extern void cpu_hotplug_begin(void);
-extern void cpu_hotplug_done(void);
-extern void get_online_cpus(void);
-extern void put_online_cpus(void);
+extern void cpus_write_lock(void);
+extern void cpus_write_unlock(void);
+extern void cpus_read_lock(void);
+extern void cpus_read_unlock(void);
+extern void lockdep_assert_cpus_held(void);
extern void cpu_hotplug_disable(void);
extern void cpu_hotplug_enable(void);
void clear_tasks_mm_cpumask(int cpu);
@@ -130,18 +129,25 @@ int cpu_down(unsigned int cpu);
extern void pin_current_cpu(void);
extern void unpin_current_cpu(void);
-#else /* CONFIG_HOTPLUG_CPU */
-
-static inline void cpu_hotplug_begin(void) {}
-static inline void cpu_hotplug_done(void) {}
-#define get_online_cpus() do { } while (0)
-#define put_online_cpus() do { } while (0)
-#define cpu_hotplug_disable() do { } while (0)
-#define cpu_hotplug_enable() do { } while (0)
-static inline void pin_current_cpu(void) { }
-static inline void unpin_current_cpu(void) { }
+#else /* CONFIG_HOTPLUG_CPU */
-#endif /* CONFIG_HOTPLUG_CPU */
+static inline void cpus_write_lock(void) { }
+static inline void cpus_write_unlock(void) { }
+static inline void cpus_read_lock(void) { }
+static inline void cpus_read_unlock(void) { }
+static inline void lockdep_assert_cpus_held(void) { }
+static inline void cpu_hotplug_disable(void) { }
+static inline void cpu_hotplug_enable(void) { }
+static inline void pin_current_cpu(void) { }
+static inline void unpin_current_cpu(void) { }
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
+/* Wrappers which go away once all code is converted */
+static inline void cpu_hotplug_begin(void) { cpus_write_lock(); }
+static inline void cpu_hotplug_done(void) { cpus_write_unlock(); }
+#define get_online_cpus cpus_read_lock
+#define put_online_cpus cpus_read_unlock
#ifdef CONFIG_PM_SLEEP_SMP
extern int freeze_secondary_cpus(int primary);
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -164,6 +164,11 @@ int __cpuhp_setup_state(enum cpuhp_state
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu), bool multi_instance);
+int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name,
+ bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance);
/**
* cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
* @state: The state for which the calls are installed
@@ -182,6 +187,15 @@ static inline int cpuhp_setup_state(enum
return __cpuhp_setup_state(state, name, true, startup, teardown, false);
}
+static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+{
+ return __cpuhp_setup_state_cpuslocked(state, name, true, startup,
+ teardown, false);
+}
+
/**
* cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
* callbacks
@@ -202,6 +216,15 @@ static inline int cpuhp_setup_state_noca
false);
}
+static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
+ const char *name,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu))
+{
+ return __cpuhp_setup_state_cpuslocked(state, name, false, startup,
+ teardown, false);
+}
+
/**
* cpuhp_setup_state_multi - Add callbacks for multi state
* @state: The state for which the calls are installed
@@ -228,6 +251,8 @@ static inline int cpuhp_setup_state_mult
int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
bool invoke);
+int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+ struct hlist_node *node, bool invoke);
/**
* cpuhp_state_add_instance - Add an instance for a state and invoke startup
@@ -260,7 +285,15 @@ static inline int cpuhp_state_add_instan
return __cpuhp_state_add_instance(state, node, false);
}
+static inline int
+cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state,
+ struct hlist_node *node)
+{
+ return __cpuhp_state_add_instance_cpuslocked(state, node, false);
+}
+
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
+void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
/**
* cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
@@ -284,6 +317,11 @@ static inline void cpuhp_remove_state_no
__cpuhp_remove_state(state, false);
}
+static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state)
+{
+ __cpuhp_remove_state_cpuslocked(state, false);
+}
+
/**
* cpuhp_remove_multi_state - Remove hotplug multi state callback
* @state: The state for which the calls are removed
--- a/include/linux/locallock.h
+++ b/include/linux/locallock.h
@@ -36,26 +36,10 @@ struct local_irq_lock {
spin_lock_init(&per_cpu(lvar, __cpu).lock); \
} while (0)
-/*
- * spin_lock|trylock|unlock_local flavour that does not migrate disable
- * used for __local_lock|trylock|unlock where get_local_var/put_local_var
- * already takes care of the migrate_disable/enable
- * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
- */
-#ifdef CONFIG_PREEMPT_RT_FULL
-# define spin_lock_local(lock) rt_spin_lock__no_mg(lock)
-# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock)
-# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock)
-#else
-# define spin_lock_local(lock) spin_lock(lock)
-# define spin_trylock_local(lock) spin_trylock(lock)
-# define spin_unlock_local(lock) spin_unlock(lock)
-#endif
-
static inline void __local_lock(struct local_irq_lock *lv)
{
if (lv->owner != current) {
- spin_lock_local(&lv->lock);
+ spin_lock(&lv->lock);
LL_WARN(lv->owner);
LL_WARN(lv->nestcnt);
lv->owner = current;
@@ -71,7 +55,7 @@ static inline void __local_lock(struct l
static inline int __local_trylock(struct local_irq_lock *lv)
{
- if (lv->owner != current && spin_trylock_local(&lv->lock)) {
+ if (lv->owner != current && spin_trylock(&lv->lock)) {
LL_WARN(lv->owner);
LL_WARN(lv->nestcnt);
lv->owner = current;
@@ -98,7 +82,7 @@ static inline void __local_unlock(struct
return;
lv->owner = NULL;
- spin_unlock_local(&lv->lock);
+ spin_unlock(&lv->lock);
}
#define local_unlock(lvar) \
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -170,9 +170,6 @@ struct padata_instance {
extern struct padata_instance *padata_alloc_possible(
struct workqueue_struct *wq);
-extern struct padata_instance *padata_alloc(struct workqueue_struct *wq,
- const struct cpumask *pcpumask,
- const struct cpumask *cbcpumask);
extern void padata_free(struct padata_instance *pinst);
extern int padata_do_parallel(struct padata_instance *pinst,
struct padata_priv *padata, int cb_cpu);
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -415,6 +415,7 @@ struct pci_dev {
unsigned int irq_managed:1;
unsigned int has_secondary_link:1;
unsigned int non_compliant_bars:1; /* broken BARs; ignore them */
+ unsigned int is_probed:1; /* device probing in progress */
unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -800,6 +800,8 @@ struct perf_cpu_context {
struct list_head sched_cb_entry;
int sched_cb_usage;
+
+ int online;
};
struct perf_output_handle {
--- a/include/linux/rwlock_rt.h
+++ b/include/linux/rwlock_rt.h
@@ -5,41 +5,39 @@
#error Do not include directly. Use spinlock.h
#endif
-#define rwlock_init(rwl) \
-do { \
- static struct lock_class_key __key; \
- \
- rt_mutex_init(&(rwl)->lock); \
- __rt_rwlock_init(rwl, #rwl, &__key); \
-} while (0)
-
extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
-extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
-extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
-extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
-#define write_trylock_irqsave(lock, flags) \
- __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
+static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags)
+{
+ /* XXX ARCH_IRQ_ENABLED */
+ *flags = 0;
+ return rt_write_trylock(lock);
+}
+
+#define write_trylock_irqsave(lock, flags) \
+ __cond_lock(lock, __write_trylock_rt_irqsave(lock, &(flags)))
#define read_lock_irqsave(lock, flags) \
do { \
typecheck(unsigned long, flags); \
- flags = rt_read_lock_irqsave(lock); \
+ rt_read_lock(lock); \
+ flags = 0; \
} while (0)
#define write_lock_irqsave(lock, flags) \
do { \
typecheck(unsigned long, flags); \
- flags = rt_write_lock_irqsave(lock); \
+ rt_write_lock(lock); \
+ flags = 0; \
} while (0)
#define read_lock(lock) rt_read_lock(lock)
@@ -96,4 +94,21 @@ extern void __rt_rwlock_init(rwlock_t *r
rt_write_unlock(lock); \
} while (0)
+#define rwlock_init(rwl) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ __rt_rwlock_init(rwl, #rwl, &__key); \
+} while (0)
+
+/*
+ * Internal functions made global for CPU pinning
+ */
+void __read_rt_lock(struct rt_rw_lock *lock);
+int __read_rt_trylock(struct rt_rw_lock *lock);
+void __write_rt_lock(struct rt_rw_lock *lock);
+int __write_rt_trylock(struct rt_rw_lock *lock);
+void __read_rt_unlock(struct rt_rw_lock *lock);
+void __write_rt_unlock(struct rt_rw_lock *lock);
+
#endif
--- a/include/linux/rwlock_types_rt.h
+++ b/include/linux/rwlock_types_rt.h
@@ -5,24 +5,24 @@
#error "Do not include directly. Include spinlock_types.h instead"
#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
+#else
+# define RW_DEP_MAP_INIT(lockname)
+#endif
+
+#ifndef CONFIG_RWLOCK_RT_READER_BIASED
/*
* rwlocks - rtmutex which allows single reader recursion
*/
typedef struct {
struct rt_mutex lock;
- int read_depth;
unsigned int break_lock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
} rwlock_t;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
-#else
-# define RW_DEP_MAP_INIT(lockname)
-#endif
-
#define __RW_LOCK_UNLOCKED(name) \
{ .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
RW_DEP_MAP_INIT(name) }
@@ -30,4 +30,49 @@ typedef struct {
#define DEFINE_RWLOCK(name) \
rwlock_t name = __RW_LOCK_UNLOCKED(name)
+#else /* CONFIG_RWLOCK_RT_READER_BIASED */
+
+typedef struct rt_rw_lock rwlock_t;
+
+#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name)
+
+#define DEFINE_RWLOCK(name) \
+ rwlock_t name = __RW_LOCK_UNLOCKED(name)
+
+#endif /* !CONFIG_RWLOCK_RT_READER_BIASED */
+
+/*
+ * A reader biased implementation primarily for CPU pinning.
+ *
+ * Can be selected as general replacement for the single reader RT rwlock
+ * variant
+ */
+struct rt_rw_lock {
+ struct rt_mutex rtmutex;
+ atomic_t readers;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+#define READER_BIAS (1U << 31)
+#define WRITER_BIAS (1U << 30)
+
+#define __RWLOCK_RT_INITIALIZER(name) \
+{ \
+ .readers = ATOMIC_INIT(READER_BIAS), \
+ .rtmutex = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.rtmutex), \
+ RW_DEP_MAP_INIT(name) \
+}
+
+void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name,
+ struct lock_class_key *key);
+
+#define rwlock_biased_rt_init(rwlock) \
+ do { \
+ static struct lock_class_key __key; \
+ \
+ __rwlock_biased_rt_init((rwlock), #rwlock, &__key); \
+ } while (0)
+
#endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -576,6 +576,7 @@ struct task_struct {
#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
int migrate_disable;
int migrate_disable_update;
+ int pinned_on_cpu;
# ifdef CONFIG_SCHED_DEBUG
int migrate_disable_atomic;
# endif
@@ -1336,6 +1337,16 @@ extern struct pid *cad_pid;
#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
#define used_math() tsk_used_math(current)
+static inline bool is_percpu_thread(void)
+{
+#ifdef CONFIG_SMP
+ return (current->flags & PF_NO_SETAFFINITY) &&
+ (current->nr_cpus_allowed == 1);
+#else
+ return true;
+#endif
+}
+
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
@@ -1394,10 +1405,6 @@ extern int task_can_attach(struct task_s
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
-int migrate_me(void);
-void tell_sched_cpu_down_begin(int cpu);
-void tell_sched_cpu_down_done(int cpu);
-
#else
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
@@ -1408,9 +1415,6 @@ static inline int set_cpus_allowed_ptr(s
return -EINVAL;
return 0;
}
-static inline int migrate_me(void) { return 0; }
-static inline void tell_sched_cpu_down_begin(int cpu) { }
-static inline void tell_sched_cpu_down_done(int cpu) { }
#endif
#ifndef cpu_relax_yield
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -18,10 +18,6 @@ do { \
__rt_spin_lock_init(slock, #slock, &__key); \
} while (0)
-void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock);
-void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock);
-int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock);
-
extern void __lockfunc rt_spin_lock(spinlock_t *lock);
extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
@@ -35,9 +31,10 @@ extern int atomic_dec_and_spin_lock(atom
/*
* lockdep-less calls, for derived types like rwlock:
* (for trylock they can use rt_mutex_trylock() directly.
+ * Migrate disable handling must be done at the call site.
*/
-extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock);
extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
+extern void __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
#define spin_lock(lock) rt_spin_lock(lock)
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -1,10 +1,6 @@
#ifndef __LINUX_SPINLOCK_TYPES_UP_H
#define __LINUX_SPINLOCK_TYPES_UP_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
/*
* include/linux/spinlock_types_up.h - spinlock type definitions for UP
*
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -116,15 +116,29 @@ static inline int try_stop_cpus(const st
* @fn() runs.
*
* This can be thought of as a very heavy write lock, equivalent to
- * grabbing every spinlock in the kernel. */
+ * grabbing every spinlock in the kernel.
+ *
+ * Protects against CPU hotplug.
+ */
int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
+/**
+ * stop_machine_cpuslocked: freeze the machine on all CPUs and run this function
+ * @fn: the function to run
+ * @data: the data ptr for the @fn()
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+ *
+ * Same as above. Must be called from with in a cpus_read_lock() protected
+ * region. Avoids nested calls to cpus_read_lock().
+ */
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
+
int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus);
#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
-static __always_inline int stop_machine(cpu_stop_fn_t fn, void *data,
- const struct cpumask *cpus)
+static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+ const struct cpumask *cpus)
{
unsigned long flags;
int ret;
@@ -134,6 +148,12 @@ static __always_inline int stop_machine(
return ret;
}
+static inline int stop_machine(cpu_stop_fn_t fn, void *data,
+ const struct cpumask *cpus)
+{
+ return stop_machine_cpuslocked(fn, data, cpus);
+}
+
static __always_inline int
stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -248,3 +248,20 @@ config ARCH_USE_QUEUED_RWLOCKS
config QUEUED_RWLOCKS
def_bool y if ARCH_USE_QUEUED_RWLOCKS
depends on SMP
+
+if PREEMPT_RT_FULL
+
+menu "RT Locking"
+
+config RWLOCK_RT_READER_BIASED
+ bool "Reader biased RWLOCK implementation for Preempt-RT"
+ def_bool n
+ help
+ This is option provides an alternative RWLOCK implementation for
+ PREEMPT-RT. This new implementation is not writer friendly as
+ the regular RT implementation or mainline. However nothing RT
+ related should be affected. Nevertheless here is a switch in case
+ something stalls to double check.
+endmenu
+
+endif
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -28,6 +28,7 @@
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
@@ -67,6 +68,17 @@ struct cpuhp_cpu_state {
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PREEMPT_RT_FULL)
+static DEFINE_PER_CPU(struct rt_rw_lock, cpuhp_pin_lock) = \
+ __RWLOCK_RT_INITIALIZER(cpuhp_pin_lock);
+#endif
+
+#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
+static struct lock_class_key cpuhp_state_key;
+static struct lockdep_map cpuhp_state_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+#endif
+
/**
* cpuhp_step - Hotplug state machine step
* @name: Name of the step
@@ -198,404 +210,90 @@ void cpu_maps_update_done(void)
mutex_unlock(&cpu_add_remove_lock);
}
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
* Should always be manipulated under cpu_add_remove_lock
*/
static int cpu_hotplug_disabled;
#ifdef CONFIG_HOTPLUG_CPU
-static struct {
- struct task_struct *active_writer;
- /* wait queue to wake up the active_writer */
- wait_queue_head_t wq;
- /* verifies that no writer will get active while readers are active */
- struct mutex lock;
- /*
- * Also blocks the new readers during
- * an ongoing cpu hotplug operation.
- */
- atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
- .active_writer = NULL,
- .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
- .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
-#endif
-};
-
-/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
-#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire_tryread() \
- lock_map_acquire_tryread(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
-#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
-
-/**
- * hotplug_pcp - per cpu hotplug descriptor
- * @unplug: set when pin_current_cpu() needs to sync tasks
- * @sync_tsk: the task that waits for tasks to finish pinned sections
- * @refcount: counter of tasks in pinned sections
- * @grab_lock: set when the tasks entering pinned sections should wait
- * @synced: notifier for @sync_tsk to tell cpu_down it's finished
- * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
- * @mutex_init: zero if the mutex hasn't been initialized yet.
- *
- * Although @unplug and @sync_tsk may point to the same task, the @unplug
- * is used as a flag and still exists after @sync_tsk has exited and
- * @sync_tsk set to NULL.
- */
-struct hotplug_pcp {
- struct task_struct *unplug;
- struct task_struct *sync_tsk;
- int refcount;
- int grab_lock;
- struct completion synced;
- struct completion unplug_wait;
-#ifdef CONFIG_PREEMPT_RT_FULL
- /*
- * Note, on PREEMPT_RT, the hotplug lock must save the state of
- * the task, otherwise the mutex will cause the task to fail
- * to sleep when required. (Because it's called from migrate_disable())
- *
- * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
- * state.
- */
- spinlock_t lock;
-#else
- struct mutex mutex;
-#endif
- int mutex_init;
-};
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
-# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
-#else
-# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
-# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
-#endif
-
-static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
-
/**
* pin_current_cpu - Prevent the current cpu from being unplugged
- *
- * Lightweight version of get_online_cpus() to prevent cpu from being
- * unplugged when code runs in a migration disabled region.
- *
- * Must be called with preemption disabled (preempt_count = 1)!
*/
void pin_current_cpu(void)
{
- struct hotplug_pcp *hp;
- int force = 0;
-
-retry:
- hp = this_cpu_ptr(&hotplug_pcp);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ struct rt_rw_lock *cpuhp_pin;
+ unsigned int cpu;
+ int ret;
- if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
- hp->unplug == current) {
- hp->refcount++;
+again:
+ cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock);
+ ret = __read_rt_trylock(cpuhp_pin);
+ if (ret) {
+ current->pinned_on_cpu = smp_processor_id();
return;
}
- if (hp->grab_lock) {
- preempt_enable();
- hotplug_lock(hp);
- hotplug_unlock(hp);
- } else {
- preempt_enable();
- /*
- * Try to push this task off of this CPU.
- */
- if (!migrate_me()) {
- preempt_disable();
- hp = this_cpu_ptr(&hotplug_pcp);
- if (!hp->grab_lock) {
- /*
- * Just let it continue it's already pinned
- * or about to sleep.
- */
- force = 1;
- goto retry;
- }
- preempt_enable();
- }
- }
- preempt_disable();
- goto retry;
-}
-
-/**
- * unpin_current_cpu - Allow unplug of current cpu
- *
- * Must be called with preemption or interrupts disabled!
- */
-void unpin_current_cpu(void)
-{
- struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
-
- WARN_ON(hp->refcount <= 0);
-
- /* This is safe. sync_unplug_thread is pinned to this cpu */
- if (!--hp->refcount && hp->unplug && hp->unplug != current)
- wake_up_process(hp->unplug);
-}
-
-static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
-{
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (hp->refcount) {
- schedule_preempt_disabled();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-}
-
-static int sync_unplug_thread(void *data)
-{
- struct hotplug_pcp *hp = data;
-
- wait_for_completion(&hp->unplug_wait);
- preempt_disable();
- hp->unplug = current;
- wait_for_pinned_cpus(hp);
-
- /*
- * This thread will synchronize the cpu_down() with threads
- * that have pinned the CPU. When the pinned CPU count reaches
- * zero, we inform the cpu_down code to continue to the next step.
- */
- set_current_state(TASK_UNINTERRUPTIBLE);
+ cpu = smp_processor_id();
+ preempt_lazy_enable();
preempt_enable();
- complete(&hp->synced);
- /*
- * If all succeeds, the next step will need tasks to wait till
- * the CPU is offline before continuing. To do this, the grab_lock
- * is set and tasks going into pin_current_cpu() will block on the
- * mutex. But we still need to wait for those that are already in
- * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
- * will kick this thread out.
- */
- while (!hp->grab_lock && !kthread_should_stop()) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-
- /* Make sure grab_lock is seen before we see a stale completion */
- smp_mb();
+ __read_rt_lock(cpuhp_pin);
- /*
- * Now just before cpu_down() enters stop machine, we need to make
- * sure all tasks that are in pinned CPU sections are out, and new
- * tasks will now grab the lock, keeping them from entering pinned
- * CPU sections.
- */
- if (!kthread_should_stop()) {
- preempt_disable();
- wait_for_pinned_cpus(hp);
- preempt_enable();
- complete(&hp->synced);
- }
-
- set_current_state(TASK_UNINTERRUPTIBLE);
- while (!kthread_should_stop()) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
+ preempt_disable();
+ preempt_lazy_disable();
+ if (cpu != smp_processor_id()) {
+ __read_rt_unlock(cpuhp_pin);
+ goto again;
}
- set_current_state(TASK_RUNNING);
-
- /*
- * Force this thread off this CPU as it's going down and
- * we don't want any more work on this CPU.
- */
- current->flags &= ~PF_NO_SETAFFINITY;
- set_cpus_allowed_ptr(current, cpu_present_mask);
- migrate_me();
- return 0;
-}
-
-static void __cpu_unplug_sync(struct hotplug_pcp *hp)
-{
- wake_up_process(hp->sync_tsk);
- wait_for_completion(&hp->synced);
-}
-
-static void __cpu_unplug_wait(unsigned int cpu)
-{
- struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
- complete(&hp->unplug_wait);
- wait_for_completion(&hp->synced);
+ current->pinned_on_cpu = cpu;
+#endif
}
-/*
- * Start the sync_unplug_thread on the target cpu and wait for it to
- * complete.
+/**
+ * unpin_current_cpu - Allow unplug of current cpu
*/
-static int cpu_unplug_begin(unsigned int cpu)
+void unpin_current_cpu(void)
{
- struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
- int err;
-
- /* Protected by cpu_hotplug.lock */
- if (!hp->mutex_init) {
#ifdef CONFIG_PREEMPT_RT_FULL
- spin_lock_init(&hp->lock);
-#else
- mutex_init(&hp->mutex);
-#endif
- hp->mutex_init = 1;
- }
-
- /* Inform the scheduler to migrate tasks off this CPU */
- tell_sched_cpu_down_begin(cpu);
+ struct rt_rw_lock *cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock);
- init_completion(&hp->synced);
- init_completion(&hp->unplug_wait);
-
- hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
- if (IS_ERR(hp->sync_tsk)) {
- err = PTR_ERR(hp->sync_tsk);
- hp->sync_tsk = NULL;
- return err;
- }
- kthread_bind(hp->sync_tsk, cpu);
+ if (WARN_ON(current->pinned_on_cpu != smp_processor_id()))
+ cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, current->pinned_on_cpu);
- /*
- * Wait for tasks to get out of the pinned sections,
- * it's still OK if new tasks enter. Some CPU notifiers will
- * wait for tasks that are going to enter these sections and
- * we must not have them block.
- */
- wake_up_process(hp->sync_tsk);
- return 0;
+ current->pinned_on_cpu = -1;
+ __read_rt_unlock(cpuhp_pin);
+#endif
}
-static void cpu_unplug_sync(unsigned int cpu)
-{
- struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
- init_completion(&hp->synced);
- /* The completion needs to be initialzied before setting grab_lock */
- smp_wmb();
+DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
- /* Grab the mutex before setting grab_lock */
- hotplug_lock(hp);
- hp->grab_lock = 1;
-
- /*
- * The CPU notifiers have been completed.
- * Wait for tasks to get out of pinned CPU sections and have new
- * tasks block until the CPU is completely down.
- */
- __cpu_unplug_sync(hp);
-
- /* All done with the sync thread */
- kthread_stop(hp->sync_tsk);
- hp->sync_tsk = NULL;
-}
-
-static void cpu_unplug_done(unsigned int cpu)
+void cpus_read_lock(void)
{
- struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
-
- hp->unplug = NULL;
- /* Let all tasks know cpu unplug is finished before cleaning up */
- smp_wmb();
-
- if (hp->sync_tsk)
- kthread_stop(hp->sync_tsk);
-
- if (hp->grab_lock) {
- hotplug_unlock(hp);
- /* protected by cpu_hotplug.lock */
- hp->grab_lock = 0;
- }
- tell_sched_cpu_down_done(cpu);
+ percpu_down_read(&cpu_hotplug_lock);
}
+EXPORT_SYMBOL_GPL(cpus_read_lock);
-void get_online_cpus(void)
+void cpus_read_unlock(void)
{
- might_sleep();
- if (cpu_hotplug.active_writer == current)
- return;
- cpuhp_lock_acquire_read();
- mutex_lock(&cpu_hotplug.lock);
- atomic_inc(&cpu_hotplug.refcount);
- mutex_unlock(&cpu_hotplug.lock);
+ percpu_up_read(&cpu_hotplug_lock);
}
-EXPORT_SYMBOL_GPL(get_online_cpus);
+EXPORT_SYMBOL_GPL(cpus_read_unlock);
-void put_online_cpus(void)
+void cpus_write_lock(void)
{
- int refcount;
-
- if (cpu_hotplug.active_writer == current)
- return;
-
- refcount = atomic_dec_return(&cpu_hotplug.refcount);
- if (WARN_ON(refcount < 0)) /* try to fix things up */
- atomic_inc(&cpu_hotplug.refcount);
-
- if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
- wake_up(&cpu_hotplug.wq);
-
- cpuhp_lock_release();
-
+ percpu_down_write(&cpu_hotplug_lock);
}
-EXPORT_SYMBOL_GPL(put_online_cpus);
-/*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- * writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- * non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
-void cpu_hotplug_begin(void)
+void cpus_write_unlock(void)
{
- DEFINE_WAIT(wait);
-
- cpu_hotplug.active_writer = current;
- cpuhp_lock_acquire();
-
- for (;;) {
- mutex_lock(&cpu_hotplug.lock);
- prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
- if (likely(!atomic_read(&cpu_hotplug.refcount)))
- break;
- mutex_unlock(&cpu_hotplug.lock);
- schedule();
- }
- finish_wait(&cpu_hotplug.wq, &wait);
+ percpu_up_write(&cpu_hotplug_lock);
}
-void cpu_hotplug_done(void)
+void lockdep_assert_cpus_held(void)
{
- cpu_hotplug.active_writer = NULL;
- mutex_unlock(&cpu_hotplug.lock);
- cpuhp_lock_release();
+ percpu_rwsem_assert_held(&cpu_hotplug_lock);
}
/*
@@ -877,6 +575,7 @@ static void cpuhp_thread_fun(unsigned in
st->should_run = false;
+ lock_map_acquire(&cpuhp_state_lock_map);
/* Single callback invocation for [un]install ? */
if (st->single) {
if (st->cb_state < CPUHP_AP_ONLINE) {
@@ -903,6 +602,7 @@ static void cpuhp_thread_fun(unsigned in
else if (st->state > st->target)
ret = cpuhp_ap_offline(cpu, st);
}
+ lock_map_release(&cpuhp_state_lock_map);
st->result = ret;
complete(&st->done);
}
@@ -917,6 +617,9 @@ cpuhp_invoke_ap_callback(int cpu, enum c
if (!cpu_online(cpu))
return 0;
+ lock_map_acquire(&cpuhp_state_lock_map);
+ lock_map_release(&cpuhp_state_lock_map);
+
/*
* If we are up and running, use the hotplug thread. For early calls
* we invoke the thread function directly.
@@ -960,6 +663,8 @@ static int cpuhp_kick_ap_work(unsigned i
enum cpuhp_state state = st->state;
trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
+ lock_map_acquire(&cpuhp_state_lock_map);
+ lock_map_release(&cpuhp_state_lock_map);
__cpuhp_kick_ap_work(st);
wait_for_completion(&st->done);
trace_cpuhp_exit(cpu, st->state, state, st->result);
@@ -1058,27 +763,33 @@ static int take_cpu_down(void *_param)
static int takedown_cpu(unsigned int cpu)
{
+#ifdef CONFIG_PREEMPT_RT_FULL
+ struct rt_rw_lock *cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, cpu);
+#endif
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int err;
- __cpu_unplug_wait(cpu);
/* Park the smpboot threads */
kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
- /* Notifiers are done. Don't let any more tasks pin this CPU. */
- cpu_unplug_sync(cpu);
-
/*
* Prevent irq alloc/free while the dying cpu reorganizes the
* interrupt affinities.
*/
irq_lock_sparse();
+#ifdef CONFIG_PREEMPT_RT_FULL
+ __write_rt_lock(cpuhp_pin);
+#endif
+
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
- err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
+ err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
+#ifdef CONFIG_PREEMPT_RT_FULL
+ __write_rt_unlock(cpuhp_pin);
+#endif
/* CPU refused to die */
irq_unlock_sparse();
/* Unpark the hotplug thread so we can rollback there */
@@ -1097,6 +808,9 @@ static int takedown_cpu(unsigned int cpu
wait_for_completion(&st->done);
BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ __write_rt_unlock(cpuhp_pin);
+#endif
/* Interrupts are moved away from the dying cpu, reenable alloc/free */
irq_unlock_sparse();
@@ -1142,9 +856,6 @@ static int __ref _cpu_down(unsigned int
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int prev_state, ret = 0;
- int mycpu;
- cpumask_var_t cpumask;
- cpumask_var_t cpumask_org;
if (num_online_cpus() == 1)
return -EBUSY;
@@ -1152,34 +863,7 @@ static int __ref _cpu_down(unsigned int
if (!cpu_present(cpu))
return -EINVAL;
- /* Move the downtaker off the unplug cpu */
- if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
- return -ENOMEM;
- if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
- free_cpumask_var(cpumask);
- return -ENOMEM;
- }
-
- cpumask_copy(cpumask_org, ¤t->cpus_mask);
- cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
- set_cpus_allowed_ptr(current, cpumask);
- free_cpumask_var(cpumask);
- migrate_disable();
- mycpu = smp_processor_id();
- if (mycpu == cpu) {
- printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
- migrate_enable();
- ret = -EBUSY;
- goto restore_cpus;
- }
-
- migrate_enable();
- cpu_hotplug_begin();
- ret = cpu_unplug_begin(cpu);
- if (ret) {
- printk("cpu_unplug_begin(%d) failed\n", cpu);
- goto out_cancel;
- }
+ cpus_write_lock();
cpuhp_tasks_frozen = tasks_frozen;
@@ -1217,12 +901,7 @@ static int __ref _cpu_down(unsigned int
}
out:
- cpu_unplug_done(cpu);
-out_cancel:
- cpu_hotplug_done();
-restore_cpus:
- set_cpus_allowed_ptr(current, cpumask_org);
- free_cpumask_var(cpumask_org);
+ cpus_write_unlock();
return ret;
}
@@ -1293,7 +972,7 @@ static int _cpu_up(unsigned int cpu, int
struct task_struct *idle;
int ret = 0;
- cpu_hotplug_begin();
+ cpus_write_lock();
if (!cpu_present(cpu)) {
ret = -EINVAL;
@@ -1828,18 +1507,20 @@ static void cpuhp_rollback_install(int f
}
}
-int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
- bool invoke)
+int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
+ struct hlist_node *node,
+ bool invoke)
{
struct cpuhp_step *sp;
int cpu;
int ret;
+ lockdep_assert_cpus_held();
+
sp = cpuhp_get_step(state);
if (sp->multi_instance == false)
return -EINVAL;
- get_online_cpus();
mutex_lock(&cpuhp_state_mutex);
if (!invoke || !sp->startup.multi)
@@ -1868,13 +1549,23 @@ add_node:
hlist_add_head(node, &sp->list);
unlock:
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
+ return ret;
+}
+
+int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+ bool invoke)
+{
+ int ret;
+
+ cpus_read_lock();
+ ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
/**
- * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
* @state: The state to setup
* @invoke: If true, the startup function is invoked for cpus where
* cpu state >= @state
@@ -1883,25 +1574,27 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_inst
* @multi_instance: State is set up for multiple instances which get
* added afterwards.
*
+ * The caller needs to hold cpus read locked while calling this function.
* Returns:
* On success:
* Positive state number if @state is CPUHP_AP_ONLINE_DYN
* 0 for all other states
* On failure: proper (negative) error code
*/
-int __cpuhp_setup_state(enum cpuhp_state state,
- const char *name, bool invoke,
- int (*startup)(unsigned int cpu),
- int (*teardown)(unsigned int cpu),
- bool multi_instance)
+int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
+ const char *name, bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance)
{
int cpu, ret = 0;
bool dynstate;
+ lockdep_assert_cpus_held();
+
if (cpuhp_cb_check(state) || !name)
return -EINVAL;
- get_online_cpus();
mutex_lock(&cpuhp_state_mutex);
ret = cpuhp_store_callbacks(state, name, startup, teardown,
@@ -1937,7 +1630,6 @@ int __cpuhp_setup_state(enum cpuhp_state
}
out:
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
/*
* If the requested state is CPUHP_AP_ONLINE_DYN, return the
* dynamically allocated state in case of success.
@@ -1946,6 +1638,22 @@ out:
return state;
return ret;
}
+EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
+
+int __cpuhp_setup_state(enum cpuhp_state state,
+ const char *name, bool invoke,
+ int (*startup)(unsigned int cpu),
+ int (*teardown)(unsigned int cpu),
+ bool multi_instance)
+{
+ int ret;
+
+ cpus_read_lock();
+ ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
+ teardown, multi_instance);
+ cpus_read_unlock();
+ return ret;
+}
EXPORT_SYMBOL(__cpuhp_setup_state);
int __cpuhp_state_remove_instance(enum cpuhp_state state,
@@ -1959,7 +1667,7 @@ int __cpuhp_state_remove_instance(enum c
if (!sp->multi_instance)
return -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&cpuhp_state_mutex);
if (!invoke || !cpuhp_get_teardown_cb(state))
@@ -1980,29 +1688,30 @@ int __cpuhp_state_remove_instance(enum c
remove:
hlist_del(node);
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
/**
- * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
* @state: The state to remove
* @invoke: If true, the teardown function is invoked for cpus where
* cpu state >= @state
*
+ * The caller needs to hold cpus read locked while calling this function.
* The teardown callback is currently not allowed to fail. Think
* about module removal!
*/
-void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
{
struct cpuhp_step *sp = cpuhp_get_step(state);
int cpu;
BUG_ON(cpuhp_cb_check(state));
- get_online_cpus();
+ lockdep_assert_cpus_held();
mutex_lock(&cpuhp_state_mutex);
if (sp->multi_instance) {
@@ -2030,7 +1739,14 @@ void __cpuhp_remove_state(enum cpuhp_sta
remove:
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
mutex_unlock(&cpuhp_state_mutex);
- put_online_cpus();
+}
+EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
+
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+{
+ cpus_read_lock();
+ __cpuhp_remove_state_cpuslocked(state, invoke);
+ cpus_read_unlock();
}
EXPORT_SYMBOL(__cpuhp_remove_state);
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -389,6 +389,7 @@ static atomic_t nr_switch_events __read_
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;
+static cpumask_var_t perf_online_mask;
/*
* perf event paranoia level:
@@ -4055,14 +4056,6 @@ find_get_context(struct pmu *pmu, struct
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);
- /*
- * We could be clever and allow to attach a event to an
- * offline CPU and activate it when the CPU comes up, but
- * that's for later.
- */
- if (!cpu_online(cpu))
- return ERR_PTR(-ENODEV);
-
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
@@ -8088,7 +8081,8 @@ static int swevent_hlist_get_cpu(int cpu
int err = 0;
mutex_lock(&swhash->hlist_mutex);
- if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
+ if (!swevent_hlist_deref(swhash) &&
+ cpumask_test_cpu(cpu, perf_online_mask)) {
struct swevent_hlist *hlist;
hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -8109,7 +8103,7 @@ static int swevent_hlist_get(void)
{
int err, cpu, failed_cpu;
- get_online_cpus();
+ mutex_lock(&pmus_lock);
for_each_possible_cpu(cpu) {
err = swevent_hlist_get_cpu(cpu);
if (err) {
@@ -8117,8 +8111,7 @@ static int swevent_hlist_get(void)
goto fail;
}
}
- put_online_cpus();
-
+ mutex_unlock(&pmus_lock);
return 0;
fail:
for_each_possible_cpu(cpu) {
@@ -8126,8 +8119,7 @@ fail:
break;
swevent_hlist_put_cpu(cpu);
}
-
- put_online_cpus();
+ mutex_unlock(&pmus_lock);
return err;
}
@@ -9453,7 +9445,7 @@ perf_event_mux_interval_ms_store(struct
pmu->hrtimer_interval_ms = timer;
/* update all cpuctx for this PMU */
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
struct perf_cpu_context *cpuctx;
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
@@ -9462,7 +9454,7 @@ perf_event_mux_interval_ms_store(struct
cpu_function_call(cpu,
(remote_function_f)perf_mux_hrtimer_restart, cpuctx);
}
- put_online_cpus();
+ cpus_read_unlock();
mutex_unlock(&mux_interval_mutex);
return count;
@@ -9592,6 +9584,7 @@ skip_type:
lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
cpuctx->ctx.pmu = pmu;
+ cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
__perf_mux_hrtimer_init(cpuctx, cpu);
}
@@ -10457,12 +10450,10 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_task;
}
- get_online_cpus();
-
if (task) {
err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
if (err)
- goto err_cpus;
+ goto err_cred;
/*
* Reuse ptrace permission checks for now.
@@ -10655,6 +10646,23 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_locked;
}
+ if (!task) {
+ /*
+ * Check if the @cpu we're creating an event for is online.
+ *
+ * We use the perf_cpu_context::ctx::mutex to serialize against
+ * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+ */
+ struct perf_cpu_context *cpuctx =
+ container_of(ctx, struct perf_cpu_context, ctx);
+
+ if (!cpuctx->online) {
+ err = -ENODEV;
+ goto err_locked;
+ }
+ }
+
+
/*
* Must be under the same ctx::mutex as perf_install_in_context(),
* because we need to serialize with concurrent event creation.
@@ -10739,8 +10747,6 @@ SYSCALL_DEFINE5(perf_event_open,
put_task_struct(task);
}
- put_online_cpus();
-
mutex_lock(¤t->perf_event_mutex);
list_add_tail(&event->owner_entry, ¤t->perf_event_list);
mutex_unlock(¤t->perf_event_mutex);
@@ -10774,8 +10780,6 @@ err_alloc:
err_cred:
if (task)
mutex_unlock(&task->signal->cred_guard_mutex);
-err_cpus:
- put_online_cpus();
err_task:
if (task)
put_task_struct(task);
@@ -10830,6 +10834,21 @@ perf_event_create_kernel_counter(struct
goto err_unlock;
}
+ if (!task) {
+ /*
+ * Check if the @cpu we're creating an event for is online.
+ *
+ * We use the perf_cpu_context::ctx::mutex to serialize against
+ * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+ */
+ struct perf_cpu_context *cpuctx =
+ container_of(ctx, struct perf_cpu_context, ctx);
+ if (!cpuctx->online) {
+ err = -ENODEV;
+ goto err_unlock;
+ }
+ }
+
if (!exclusive_event_installable(event, ctx)) {
err = -EBUSY;
goto err_unlock;
@@ -11527,6 +11546,8 @@ static void __init perf_event_init_all_c
struct swevent_htable *swhash;
int cpu;
+ zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
+
for_each_possible_cpu(cpu) {
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
@@ -11542,7 +11563,7 @@ static void __init perf_event_init_all_c
}
}
-int perf_event_init_cpu(unsigned int cpu)
+void perf_swevent_init_cpu(unsigned int cpu)
{
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
@@ -11555,7 +11576,6 @@ int perf_event_init_cpu(unsigned int cpu
rcu_assign_pointer(swhash->swevent_hlist, hlist);
}
mutex_unlock(&swhash->hlist_mutex);
- return 0;
}
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
@@ -11573,19 +11593,22 @@ static void __perf_event_exit_context(vo
static void perf_event_exit_cpu_context(int cpu)
{
+ struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;
- int idx;
- idx = srcu_read_lock(&pmus_srcu);
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
+ mutex_lock(&pmus_lock);
+ list_for_each_entry(pmu, &pmus, entry) {
+ cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+ ctx = &cpuctx->ctx;
mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+ cpuctx->online = 0;
mutex_unlock(&ctx->mutex);
}
- srcu_read_unlock(&pmus_srcu, idx);
+ cpumask_clear_cpu(cpu, perf_online_mask);
+ mutex_unlock(&pmus_lock);
}
#else
@@ -11593,6 +11616,29 @@ static void perf_event_exit_cpu_context(
#endif
+int perf_event_init_cpu(unsigned int cpu)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+ struct pmu *pmu;
+
+ perf_swevent_init_cpu(cpu);
+
+ mutex_lock(&pmus_lock);
+ cpumask_set_cpu(cpu, perf_online_mask);
+ list_for_each_entry(pmu, &pmus, entry) {
+ cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+ ctx = &cpuctx->ctx;
+
+ mutex_lock(&ctx->mutex);
+ cpuctx->online = 1;
+ mutex_unlock(&ctx->mutex);
+ }
+ mutex_unlock(&pmus_lock);
+
+ return 0;
+}
+
int perf_event_exit_cpu(unsigned int cpu)
{
perf_event_exit_cpu_context(cpu);
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -483,11 +483,6 @@ static DECLARE_DELAYED_WORK(optimizing_w
*/
static void do_optimize_kprobes(void)
{
- /* Optimization never be done when disarmed */
- if (kprobes_all_disarmed || !kprobes_allow_optimization ||
- list_empty(&optimizing_list))
- return;
-
/*
* The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus.
@@ -495,14 +490,19 @@ static void do_optimize_kprobes(void)
* This combination can cause a deadlock (cpu-hotplug try to lock
* text_mutex but stop_machine can not be done because online_cpus
* has been changed)
- * To avoid this deadlock, we need to call get_online_cpus()
+ * To avoid this deadlock, caller must have locked cpu hotplug
* for preventing cpu-hotplug outside of text_mutex locking.
*/
- get_online_cpus();
+ lockdep_assert_cpus_held();
+
+ /* Optimization never be done when disarmed */
+ if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+ list_empty(&optimizing_list))
+ return;
+
mutex_lock(&text_mutex);
arch_optimize_kprobes(&optimizing_list);
mutex_unlock(&text_mutex);
- put_online_cpus();
}
/*
@@ -513,12 +513,13 @@ static void do_unoptimize_kprobes(void)
{
struct optimized_kprobe *op, *tmp;
+ /* See comment in do_optimize_kprobes() */
+ lockdep_assert_cpus_held();
+
/* Unoptimization must be done anytime */
if (list_empty(&unoptimizing_list))
return;
- /* Ditto to do_optimize_kprobes */
- get_online_cpus();
mutex_lock(&text_mutex);
arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
/* Loop free_list for disarming */
@@ -537,7 +538,6 @@ static void do_unoptimize_kprobes(void)
list_del_init(&op->list);
}
mutex_unlock(&text_mutex);
- put_online_cpus();
}
/* Reclaim all kprobes on the free_list */
@@ -562,6 +562,7 @@ static void kick_kprobe_optimizer(void)
static void kprobe_optimizer(struct work_struct *work)
{
mutex_lock(&kprobe_mutex);
+ cpus_read_lock();
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
@@ -587,6 +588,7 @@ static void kprobe_optimizer(struct work
do_free_cleaned_kprobes();
mutex_unlock(&module_mutex);
+ cpus_read_unlock();
mutex_unlock(&kprobe_mutex);
/* Step 5: Kick optimizer again if needed */
@@ -650,9 +652,8 @@ static void optimize_kprobe(struct kprob
/* Short cut to direct unoptimizing */
static void force_unoptimize_kprobe(struct optimized_kprobe *op)
{
- get_online_cpus();
+ lockdep_assert_cpus_held();
arch_unoptimize_kprobe(op);
- put_online_cpus();
if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp);
}
@@ -796,6 +797,7 @@ static void try_to_optimize_kprobe(struc
return;
/* For preparing optimization, jump_label_text_reserved() is called */
+ cpus_read_lock();
jump_label_lock();
mutex_lock(&text_mutex);
@@ -817,6 +819,7 @@ static void try_to_optimize_kprobe(struc
out:
mutex_unlock(&text_mutex);
jump_label_unlock();
+ cpus_read_unlock();
}
#ifdef CONFIG_SYSCTL
@@ -831,6 +834,7 @@ static void optimize_all_kprobes(void)
if (kprobes_allow_optimization)
goto out;
+ cpus_read_lock();
kprobes_allow_optimization = true;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
@@ -838,6 +842,7 @@ static void optimize_all_kprobes(void)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
+ cpus_read_unlock();
printk(KERN_INFO "Kprobes globally optimized\n");
out:
mutex_unlock(&kprobe_mutex);
@@ -856,6 +861,7 @@ static void unoptimize_all_kprobes(void)
return;
}
+ cpus_read_lock();
kprobes_allow_optimization = false;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
@@ -864,6 +870,7 @@ static void unoptimize_all_kprobes(void)
unoptimize_kprobe(p, false);
}
}
+ cpus_read_unlock();
mutex_unlock(&kprobe_mutex);
/* Wait for unoptimizing completion */
@@ -1020,14 +1027,11 @@ static void arm_kprobe(struct kprobe *kp
arm_kprobe_ftrace(kp);
return;
}
- /*
- * Here, since __arm_kprobe() doesn't use stop_machine(),
- * this doesn't cause deadlock on text_mutex. So, we don't
- * need get_online_cpus().
- */
+ cpus_read_lock();
mutex_lock(&text_mutex);
__arm_kprobe(kp);
mutex_unlock(&text_mutex);
+ cpus_read_unlock();
}
/* Disarm a kprobe with text_mutex */
@@ -1037,10 +1041,12 @@ static void disarm_kprobe(struct kprobe
disarm_kprobe_ftrace(kp);
return;
}
- /* Ditto */
+
+ cpus_read_lock();
mutex_lock(&text_mutex);
__disarm_kprobe(kp, reopt);
mutex_unlock(&text_mutex);
+ cpus_read_unlock();
}
/*
@@ -1308,13 +1314,10 @@ static int register_aggr_kprobe(struct k
int ret = 0;
struct kprobe *ap = orig_p;
+ cpus_read_lock();
+
/* For preparing optimization, jump_label_text_reserved() is called */
jump_label_lock();
- /*
- * Get online CPUs to avoid text_mutex deadlock.with stop machine,
- * which is invoked by unoptimize_kprobe() in add_new_kprobe()
- */
- get_online_cpus();
mutex_lock(&text_mutex);
if (!kprobe_aggrprobe(orig_p)) {
@@ -1365,8 +1368,8 @@ static int register_aggr_kprobe(struct k
out:
mutex_unlock(&text_mutex);
- put_online_cpus();
jump_label_unlock();
+ cpus_read_unlock();
if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
ap->flags &= ~KPROBE_FLAG_DISABLED;
@@ -1568,9 +1571,12 @@ int register_kprobe(struct kprobe *p)
goto out;
}
- mutex_lock(&text_mutex); /* Avoiding text modification */
+ cpus_read_lock();
+ /* Prevent text modification */
+ mutex_lock(&text_mutex);
ret = prepare_kprobe(p);
mutex_unlock(&text_mutex);
+ cpus_read_unlock();
if (ret)
goto out;
@@ -1583,7 +1589,6 @@ int register_kprobe(struct kprobe *p)
/* Try to optimize kprobe */
try_to_optimize_kprobe(p);
-
out:
mutex_unlock(&kprobe_mutex);
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -32,7 +32,7 @@ ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
endif
-obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o rwsem-rt.o
+obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o rwsem-rt.o rwlock-rt.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
--- a/kernel/locking/rt.c
+++ b/kernel/locking/rt.c
@@ -198,137 +198,6 @@ void __lockfunc _mutex_unlock(struct mut
}
EXPORT_SYMBOL(_mutex_unlock);
-/*
- * rwlock_t functions
- */
-int __lockfunc rt_write_trylock(rwlock_t *rwlock)
-{
- int ret;
-
- migrate_disable();
- ret = __rt_mutex_trylock(&rwlock->lock);
- if (ret)
- rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
- else
- migrate_enable();
-
- return ret;
-}
-EXPORT_SYMBOL(rt_write_trylock);
-
-int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
-{
- int ret;
-
- *flags = 0;
- ret = rt_write_trylock(rwlock);
- return ret;
-}
-EXPORT_SYMBOL(rt_write_trylock_irqsave);
-
-int __lockfunc rt_read_trylock(rwlock_t *rwlock)
-{
- struct rt_mutex *lock = &rwlock->lock;
- int ret = 1;
-
- /*
- * recursive read locks succeed when current owns the lock,
- * but not when read_depth == 0 which means that the lock is
- * write locked.
- */
- if (rt_mutex_owner(lock) != current) {
- migrate_disable();
- ret = __rt_mutex_trylock(lock);
- if (ret)
- rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
- else
- migrate_enable();
-
- } else if (!rwlock->read_depth) {
- ret = 0;
- }
-
- if (ret)
- rwlock->read_depth++;
-
- return ret;
-}
-EXPORT_SYMBOL(rt_read_trylock);
-
-void __lockfunc rt_write_lock(rwlock_t *rwlock)
-{
- rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
- __rt_spin_lock(&rwlock->lock);
-}
-EXPORT_SYMBOL(rt_write_lock);
-
-void __lockfunc rt_read_lock(rwlock_t *rwlock)
-{
- struct rt_mutex *lock = &rwlock->lock;
-
-
- /*
- * recursive read locks succeed when current owns the lock
- */
- if (rt_mutex_owner(lock) != current) {
- rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
- __rt_spin_lock(lock);
- }
- rwlock->read_depth++;
-}
-
-EXPORT_SYMBOL(rt_read_lock);
-
-void __lockfunc rt_write_unlock(rwlock_t *rwlock)
-{
- /* NOTE: we always pass in '1' for nested, for simplicity */
- rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
- __rt_spin_unlock(&rwlock->lock);
- migrate_enable();
-}
-EXPORT_SYMBOL(rt_write_unlock);
-
-void __lockfunc rt_read_unlock(rwlock_t *rwlock)
-{
- /* Release the lock only when read_depth is down to 0 */
- if (--rwlock->read_depth == 0) {
- rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
- __rt_spin_unlock(&rwlock->lock);
- migrate_enable();
- }
-}
-EXPORT_SYMBOL(rt_read_unlock);
-
-unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
-{
- rt_write_lock(rwlock);
-
- return 0;
-}
-EXPORT_SYMBOL(rt_write_lock_irqsave);
-
-unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
-{
- rt_read_lock(rwlock);
-
- return 0;
-}
-EXPORT_SYMBOL(rt_read_lock_irqsave);
-
-void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /*
- * Make sure we are not reinitializing a held lock:
- */
- debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
- lockdep_init_map(&rwlock->dep_map, name, key, 0);
-#endif
- rwlock->lock.save_state = 1;
- rwlock->read_depth = 0;
-}
-EXPORT_SYMBOL(__rt_rwlock_init);
-
/**
* atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
* @cnt: the atomic which we are to dec
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -968,19 +968,14 @@ takeit:
* preemptible spin_lock functions:
*/
static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
- void (*slowfn)(struct rt_mutex *lock,
- bool mg_off),
- bool do_mig_dis)
+ void (*slowfn)(struct rt_mutex *lock))
{
might_sleep_no_state_check();
- if (do_mig_dis)
- migrate_disable();
-
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
return;
else
- slowfn(lock, do_mig_dis);
+ slowfn(lock);
}
static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
@@ -1038,22 +1033,16 @@ static int task_blocks_on_rt_mutex(struc
* We store the current state under p->pi_lock in p->saved_state and
* the try_to_wake_up() code handles this accordingly.
*/
-static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
- bool mg_off)
+void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ unsigned long flags)
{
struct task_struct *lock_owner, *self = current;
- struct rt_mutex_waiter waiter, *top_waiter;
- unsigned long flags;
+ struct rt_mutex_waiter *top_waiter;
int ret;
- rt_mutex_init_waiter(&waiter, true);
-
- raw_spin_lock_irqsave(&lock->wait_lock, flags);
-
- if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL))
return;
- }
BUG_ON(rt_mutex_owner(lock) == self);
@@ -1068,12 +1057,12 @@ static void noinline __sched rt_spin_lo
__set_current_state_no_track(TASK_UNINTERRUPTIBLE);
raw_spin_unlock(&self->pi_lock);
- ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
+ ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK);
BUG_ON(ret);
for (;;) {
/* Try to acquire the lock again. */
- if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
+ if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL))
break;
top_waiter = rt_mutex_top_waiter(lock);
@@ -1081,15 +1070,10 @@ static void noinline __sched rt_spin_lo
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
- debug_rt_mutex_print_deadlock(&waiter);
+ debug_rt_mutex_print_deadlock(waiter);
- if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
- if (mg_off)
- migrate_enable();
+ if (top_waiter != waiter || adaptive_wait(lock, lock_owner))
schedule();
- if (mg_off)
- migrate_disable();
- }
raw_spin_lock_irqsave(&lock->wait_lock, flags);
@@ -1116,11 +1100,20 @@ static void noinline __sched rt_spin_lo
*/
fixup_rt_mutex_waiters(lock);
- BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
- BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
+ BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock));
+ BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry));
+}
- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
+{
+ struct rt_mutex_waiter waiter;
+ unsigned long flags;
+ rt_mutex_init_waiter(&waiter, true);
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ rt_spin_lock_slowlock_locked(lock, &waiter, flags);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
debug_rt_mutex_free_waiter(&waiter);
}
@@ -1130,7 +1123,7 @@ static bool __sched __rt_mutex_unlock_co
/*
* Slow path to release a rt_mutex spin_lock style
*/
-static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
+void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
{
unsigned long flags;
DEFINE_WAKE_Q(wake_q);
@@ -1145,49 +1138,29 @@ static void noinline __sched rt_spin_lo
rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
}
-void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
-{
- rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
- spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-}
-EXPORT_SYMBOL(rt_spin_lock__no_mg);
-
void __lockfunc rt_spin_lock(spinlock_t *lock)
{
- rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
+ migrate_disable();
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
}
EXPORT_SYMBOL(rt_spin_lock);
void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
{
- rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
}
-EXPORT_SYMBOL(__rt_spin_lock);
-
-void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
-{
- rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
-}
-EXPORT_SYMBOL(__rt_spin_lock__no_mg);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
{
+ migrate_disable();
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
- rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
}
EXPORT_SYMBOL(rt_spin_lock_nested);
#endif
-void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
-{
- /* NOTE: we always pass in '1' for nested, for simplicity */
- spin_release(&lock->dep_map, 1, _RET_IP_);
- rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
-}
-EXPORT_SYMBOL(rt_spin_unlock__no_mg);
-
void __lockfunc rt_spin_unlock(spinlock_t *lock)
{
/* NOTE: we always pass in '1' for nested, for simplicity */
@@ -1215,17 +1188,6 @@ void __lockfunc rt_spin_unlock_wait(spin
}
EXPORT_SYMBOL(rt_spin_unlock_wait);
-int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
-{
- int ret;
-
- ret = __rt_mutex_trylock(&lock->lock);
- if (ret)
- spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
- return ret;
-}
-EXPORT_SYMBOL(rt_spin_trylock__no_mg);
-
int __lockfunc rt_spin_trylock(spinlock_t *lock)
{
int ret;
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -139,6 +139,10 @@ int __sched rt_mutex_slowlock_locked(str
enum rtmutex_chainwalk chwalk,
struct ww_acquire_ctx *ww_ctx,
struct rt_mutex_waiter *waiter);
+void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ unsigned long flags);
+void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock);
#ifdef CONFIG_DEBUG_RT_MUTEXES
# include "rtmutex-debug.h"
--- /dev/null
+++ b/kernel/locking/rwlock-rt.c
@@ -0,0 +1,400 @@
+/*
+ */
+#include <linux/sched/debug.h>
+#include <linux/export.h>
+
+#include "rtmutex_common.h"
+#include <linux/rwlock_types_rt.h>
+
+/*
+ * RT-specific reader/writer locks
+ *
+ * write_lock()
+ * 1) Lock lock->rtmutex
+ * 2) Remove the reader BIAS to force readers into the slow path
+ * 3) Wait until all readers have left the critical region
+ * 4) Mark it write locked
+ *
+ * write_unlock()
+ * 1) Remove the write locked marker
+ * 2) Set the reader BIAS so readers can use the fast path again
+ * 3) Unlock lock->rtmutex to release blocked readers
+ *
+ * read_lock()
+ * 1) Try fast path acquisition (reader BIAS is set)
+ * 2) Take lock->rtmutex.wait_lock which protects the writelocked flag
+ * 3) If !writelocked, acquire it for read
+ * 4) If writelocked, block on lock->rtmutex
+ * 5) unlock lock->rtmutex, goto 1)
+ *
+ * read_unlock()
+ * 1) Try fast path release (reader count != 1)
+ * 2) Wake the writer waiting in write_lock()#3
+ *
+ * read_lock()#3 has the consequence, that rw locks on RT are not writer
+ * fair, but writers, which should be avoided in RT tasks (think tasklist
+ * lock), are subject to the rtmutex priority/DL inheritance mechanism.
+ *
+ * It's possible to make the rw locks writer fair by keeping a list of
+ * active readers. A blocked writer would force all newly incoming readers
+ * to block on the rtmutex, but the rtmutex would have to be proxy locked
+ * for one reader after the other. We can't use multi-reader inheritance
+ * because there is no way to support that with
+ * SCHED_DEADLINE. Implementing the one by one reader boosting/handover
+ * mechanism is a major surgery for a very dubious value.
+ *
+ * The risk of writer starvation is there, but the pathological use cases
+ * which trigger it are not necessarily the typical RT workloads.
+ */
+
+void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+ * Make sure we are not reinitializing a held semaphore:
+ */
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
+ atomic_set(&lock->readers, READER_BIAS);
+ rt_mutex_init(&lock->rtmutex);
+ lock->rtmutex.save_state = 1;
+}
+
+int __read_rt_trylock(struct rt_rw_lock *lock)
+{
+ int r, old;
+
+ /*
+ * Increment reader count, if lock->readers < 0, i.e. READER_BIAS is
+ * set.
+ */
+ for (r = atomic_read(&lock->readers); r < 0;) {
+ old = atomic_cmpxchg(&lock->readers, r, r + 1);
+ if (likely(old == r))
+ return 1;
+ r = old;
+ }
+ return 0;
+}
+
+void __sched __read_rt_lock(struct rt_rw_lock *lock)
+{
+ struct rt_mutex *m = &lock->rtmutex;
+ struct rt_mutex_waiter waiter;
+ unsigned long flags;
+
+ if (__read_rt_trylock(lock))
+ return;
+
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
+ /*
+ * Allow readers as long as the writer has not completely
+ * acquired the semaphore for write.
+ */
+ if (atomic_read(&lock->readers) != WRITER_BIAS) {
+ atomic_inc(&lock->readers);
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+ return;
+ }
+
+ /*
+ * Call into the slow lock path with the rtmutex->wait_lock
+ * held, so this can't result in the following race:
+ *
+ * Reader1 Reader2 Writer
+ * read_lock()
+ * write_lock()
+ * rtmutex_lock(m)
+ * swait()
+ * read_lock()
+ * unlock(m->wait_lock)
+ * read_unlock()
+ * swake()
+ * lock(m->wait_lock)
+ * lock->writelocked=true
+ * unlock(m->wait_lock)
+ *
+ * write_unlock()
+ * lock->writelocked=false
+ * rtmutex_unlock(m)
+ * read_lock()
+ * write_lock()
+ * rtmutex_lock(m)
+ * swait()
+ * rtmutex_lock(m)
+ *
+ * That would put Reader1 behind the writer waiting on
+ * Reader2 to call read_unlock() which might be unbound.
+ */
+ rt_mutex_init_waiter(&waiter, false);
+ rt_spin_lock_slowlock_locked(m, &waiter, flags);
+ /*
+ * The slowlock() above is guaranteed to return with the rtmutex is
+ * now held, so there can't be a writer active. Increment the reader
+ * count and immediately drop the rtmutex again.
+ */
+ atomic_inc(&lock->readers);
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+ rt_spin_lock_slowunlock(m);
+
+ debug_rt_mutex_free_waiter(&waiter);
+}
+
+void __read_rt_unlock(struct rt_rw_lock *lock)
+{
+ struct rt_mutex *m = &lock->rtmutex;
+ struct task_struct *tsk;
+
+ /*
+ * sem->readers can only hit 0 when a writer is waiting for the
+ * active readers to leave the critical region.
+ */
+ if (!atomic_dec_and_test(&lock->readers))
+ return;
+
+ raw_spin_lock_irq(&m->wait_lock);
+ /*
+ * Wake the writer, i.e. the rtmutex owner. It might release the
+ * rtmutex concurrently in the fast path, but to clean up the rw
+ * lock it needs to acquire m->wait_lock. The worst case which can
+ * happen is a spurious wakeup.
+ */
+ tsk = rt_mutex_owner(m);
+ if (tsk)
+ wake_up_process(tsk);
+
+ raw_spin_unlock_irq(&m->wait_lock);
+}
+
+static void __write_unlock_common(struct rt_rw_lock *lock, int bias,
+ unsigned long flags)
+{
+ struct rt_mutex *m = &lock->rtmutex;
+
+ atomic_add(READER_BIAS - bias, &lock->readers);
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+ rt_spin_lock_slowunlock(m);
+}
+
+void __sched __write_rt_lock(struct rt_rw_lock *lock)
+{
+ struct rt_mutex *m = &lock->rtmutex;
+ struct task_struct *self = current;
+ unsigned long flags;
+
+ /* Take the rtmutex as a first step */
+ __rt_spin_lock(m);
+
+ /* Force readers into slow path */
+ atomic_sub(READER_BIAS, &lock->readers);
+
+ for (;;) {
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
+
+ raw_spin_lock(&self->pi_lock);
+ self->saved_state = self->state;
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+ raw_spin_unlock(&self->pi_lock);
+
+ /* Have all readers left the critical region? */
+ if (!atomic_read(&lock->readers)) {
+ atomic_set(&lock->readers, WRITER_BIAS);
+ raw_spin_lock(&self->pi_lock);
+ __set_current_state_no_track(self->saved_state);
+ self->saved_state = TASK_RUNNING;
+ raw_spin_unlock(&self->pi_lock);
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+ return;
+ }
+
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+
+ if (atomic_read(&lock->readers) != 0)
+ schedule();
+ }
+}
+
+int __write_rt_trylock(struct rt_rw_lock *lock)
+{
+ struct rt_mutex *m = &lock->rtmutex;
+ unsigned long flags;
+
+ if (!rt_mutex_trylock(m))
+ return 0;
+
+ atomic_sub(READER_BIAS, &lock->readers);
+
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
+ if (!atomic_read(&lock->readers)) {
+ atomic_set(&lock->readers, WRITER_BIAS);
+ raw_spin_unlock_irqrestore(&m->wait_lock, flags);
+ return 1;
+ }
+ __write_unlock_common(lock, 0, flags);
+ return 0;
+}
+
+void __write_rt_unlock(struct rt_rw_lock *lock)
+{
+ struct rt_mutex *m = &lock->rtmutex;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
+ __write_unlock_common(lock, WRITER_BIAS, flags);
+}
+
+#ifndef CONFIG_RWLOCK_RT_READER_BIASED
+/* Map the single reader implementation */
+static inline int do_read_rt_trylock(rwlock_t *rwlock)
+{
+ return rt_mutex_trylock(&rwlock->lock);
+}
+
+static inline int do_write_rt_trylock(rwlock_t *rwlock)
+{
+ return rt_mutex_trylock(&rwlock->lock);
+}
+
+static inline void do_read_rt_lock(rwlock_t *rwlock)
+{
+ __rt_spin_lock(&rwlock->lock);
+}
+
+static inline void do_write_rt_lock(rwlock_t *rwlock)
+{
+ __rt_spin_lock(&rwlock->lock);
+}
+
+static inline void do_read_rt_unlock(rwlock_t *rwlock)
+{
+ __rt_spin_unlock(&rwlock->lock);
+}
+
+static inline void do_write_rt_unlock(rwlock_t *rwlock)
+{
+ __rt_spin_unlock(&rwlock->lock);
+}
+
+static inline void do_rwlock_rt_init(rwlock_t *rwlock, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+ * Make sure we are not reinitializing a held lock:
+ */
+ debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
+ lockdep_init_map(&rwlock->dep_map, name, key, 0);
+#endif
+ rt_mutex_init(&rwlock->lock);
+ rwlock->lock.save_state = 1;
+}
+
+#else
+/* Map the reader biased implementation */
+static inline int do_read_rt_trylock(rwlock_t *rwlock)
+{
+ return __read_rt_trylock(rwlock);
+}
+
+static inline int do_write_rt_trylock(rwlock_t *rwlock)
+{
+ return __write_rt_trylock(rwlock);
+}
+
+static inline void do_read_rt_lock(rwlock_t *rwlock)
+{
+ __read_rt_lock(rwlock);
+}
+
+static inline void do_write_rt_lock(rwlock_t *rwlock)
+{
+ __write_rt_lock(rwlock);
+}
+
+static inline void do_read_rt_unlock(rwlock_t *rwlock)
+{
+ __read_rt_unlock(rwlock);
+}
+
+static inline void do_write_rt_unlock(rwlock_t *rwlock)
+{
+ __write_rt_unlock(rwlock);
+}
+
+static inline void do_rwlock_rt_init(rwlock_t *rwlock, const char *name,
+ struct lock_class_key *key)
+{
+ __rwlock_biased_rt_init(rwlock, name, key);
+}
+#endif
+
+/*
+ * The common functions which get wrapped into the rwlock API.
+ */
+int __lockfunc rt_read_trylock(rwlock_t *rwlock)
+{
+ int ret;
+
+ migrate_disable();
+ ret = do_read_rt_trylock(rwlock);
+ if (ret)
+ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
+ else
+ migrate_enable();
+ return ret;
+}
+EXPORT_SYMBOL(rt_read_trylock);
+
+int __lockfunc rt_write_trylock(rwlock_t *rwlock)
+{
+ int ret;
+
+ migrate_disable();
+ ret = do_write_rt_trylock(rwlock);
+ if (ret)
+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
+ else
+ migrate_enable();
+ return ret;
+}
+EXPORT_SYMBOL(rt_write_trylock);
+
+void __lockfunc rt_read_lock(rwlock_t *rwlock)
+{
+ migrate_disable();
+ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
+ do_read_rt_lock(rwlock);
+}
+EXPORT_SYMBOL(rt_read_lock);
+
+void __lockfunc rt_write_lock(rwlock_t *rwlock)
+{
+ migrate_disable();
+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
+ do_write_rt_lock(rwlock);
+}
+EXPORT_SYMBOL(rt_write_lock);
+
+void __lockfunc rt_read_unlock(rwlock_t *rwlock)
+{
+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
+ do_read_rt_unlock(rwlock);
+ migrate_enable();
+}
+EXPORT_SYMBOL(rt_read_unlock);
+
+void __lockfunc rt_write_unlock(rwlock_t *rwlock)
+{
+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
+ do_write_rt_unlock(rwlock);
+ migrate_enable();
+}
+EXPORT_SYMBOL(rt_write_unlock);
+
+void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
+{
+ do_rwlock_rt_init(rwlock, name, key);
+}
+EXPORT_SYMBOL(__rt_rwlock_init);
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -966,29 +966,18 @@ static struct kobj_type padata_attr_type
};
/**
- * padata_alloc_possible - Allocate and initialize padata instance.
- * Use the cpu_possible_mask for serial and
- * parallel workers.
- *
- * @wq: workqueue to use for the allocated padata instance
- */
-struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
-{
- return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
-}
-EXPORT_SYMBOL(padata_alloc_possible);
-
-/**
* padata_alloc - allocate and initialize a padata instance and specify
* cpumasks for serial and parallel workers.
*
* @wq: workqueue to use for the allocated padata instance
* @pcpumask: cpumask that will be used for padata parallelization
* @cbcpumask: cpumask that will be used for padata serialization
+ *
+ * Must be called from a cpus_read_lock() protected region
*/
-struct padata_instance *padata_alloc(struct workqueue_struct *wq,
- const struct cpumask *pcpumask,
- const struct cpumask *cbcpumask)
+static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+ const struct cpumask *pcpumask,
+ const struct cpumask *cbcpumask)
{
struct padata_instance *pinst;
struct parallel_data *pd = NULL;
@@ -997,7 +986,6 @@ struct padata_instance *padata_alloc(str
if (!pinst)
goto err;
- get_online_cpus();
if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
goto err_free_inst;
if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
@@ -1021,14 +1009,12 @@ struct padata_instance *padata_alloc(str
pinst->flags = 0;
- put_online_cpus();
-
BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
kobject_init(&pinst->kobj, &padata_attr_type);
mutex_init(&pinst->lock);
#ifdef CONFIG_HOTPLUG_CPU
- cpuhp_state_add_instance_nocalls(hp_online, &pinst->node);
+ cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
#endif
return pinst;
@@ -1037,12 +1023,27 @@ err_free_masks:
free_cpumask_var(pinst->cpumask.cbcpu);
err_free_inst:
kfree(pinst);
- put_online_cpus();
err:
return NULL;
}
/**
+ * padata_alloc_possible - Allocate and initialize padata instance.
+ * Use the cpu_possible_mask for serial and
+ * parallel workers.
+ *
+ * @wq: workqueue to use for the allocated padata instance
+ *
+ * Must be called from a cpus_read_lock() protected region
+ */
+struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
+{
+ lockdep_assert_cpus_held();
+ return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+}
+EXPORT_SYMBOL(padata_alloc_possible);
+
+/**
* padata_free - free a padata instance
*
* @padata_inst: padata instance to free
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1223,84 +1223,6 @@ void do_set_cpus_allowed(struct task_str
__do_set_cpus_allowed_tail(p, new_mask);
}
-static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
-static DEFINE_MUTEX(sched_down_mutex);
-static cpumask_t sched_down_cpumask;
-
-void tell_sched_cpu_down_begin(int cpu)
-{
- mutex_lock(&sched_down_mutex);
- cpumask_set_cpu(cpu, &sched_down_cpumask);
- mutex_unlock(&sched_down_mutex);
-}
-
-void tell_sched_cpu_down_done(int cpu)
-{
- mutex_lock(&sched_down_mutex);
- cpumask_clear_cpu(cpu, &sched_down_cpumask);
- mutex_unlock(&sched_down_mutex);
-}
-
-/**
- * migrate_me - try to move the current task off this cpu
- *
- * Used by the pin_current_cpu() code to try to get tasks
- * to move off the current CPU as it is going down.
- * It will only move the task if the task isn't pinned to
- * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
- * and the task has to be in a RUNNING state. Otherwise the
- * movement of the task will wake it up (change its state
- * to running) when the task did not expect it.
- *
- * Returns 1 if it succeeded in moving the current task
- * 0 otherwise.
- */
-int migrate_me(void)
-{
- struct task_struct *p = current;
- struct migration_arg arg;
- struct cpumask *cpumask;
- const struct cpumask *mask;
- unsigned int dest_cpu;
- struct rq_flags rf;
- struct rq *rq;
-
- /*
- * We can not migrate tasks bounded to a CPU or tasks not
- * running. The movement of the task will wake it up.
- */
- if (p->flags & PF_NO_SETAFFINITY || p->state)
- return 0;
-
- mutex_lock(&sched_down_mutex);
- rq = task_rq_lock(p, &rf);
-
- cpumask = this_cpu_ptr(&sched_cpumasks);
- mask = p->cpus_ptr;
-
- cpumask_andnot(cpumask, mask, &sched_down_cpumask);
-
- if (!cpumask_weight(cpumask)) {
- /* It's only on this CPU? */
- task_rq_unlock(rq, p, &rf);
- mutex_unlock(&sched_down_mutex);
- return 0;
- }
-
- dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
-
- arg.task = p;
- arg.dest_cpu = dest_cpu;
-
- task_rq_unlock(rq, p, &rf);
-
- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
- tlb_migrate_finish(p->mm);
- mutex_unlock(&sched_down_mutex);
-
- return 1;
-}
-
/*
* Change a given task's CPU affinity. Migrate the thread to a
* proper CPU and schedule it away if the CPU it's executing on
@@ -7667,8 +7589,6 @@ void migrate_disable(void)
return;
}
- /* get_online_cpus(); */
-
preempt_disable();
preempt_lazy_disable();
pin_current_cpu();
@@ -7744,13 +7664,11 @@ void migrate_enable(void)
preempt_enable();
stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
- /* put_online_cpus(); */
return;
}
}
unpin_current_cpu();
- /* put_online_cpus(); */
preempt_lazy_enable();
preempt_enable();
}
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -577,7 +577,8 @@ static int __init cpu_stop_init(void)
}
early_initcall(cpu_stop_init);
-static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
+int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+ const struct cpumask *cpus)
{
struct multi_stop_data msdata = {
.fn = fn,
@@ -586,6 +587,8 @@ static int __stop_machine(cpu_stop_fn_t
.active_cpus = cpus,
};
+ lockdep_assert_cpus_held();
+
if (!stop_machine_initialized) {
/*
* Handle the case where stop_machine() is called
@@ -615,9 +618,9 @@ int stop_machine(cpu_stop_fn_t fn, void
int ret;
/* No CPUs can come up or down during this. */
- get_online_cpus();
- ret = __stop_machine(fn, data, cpus);
- put_online_cpus();
+ cpus_read_lock();
+ ret = stop_machine_cpuslocked(fn, data, cpus);
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(stop_machine);