From c35b1245712f30ff544bd6dc082efdff25ea9a17 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Mar 02 2021 13:15:25 +0000 Subject: Merge branch 'users/jgross/SLE15-SP2/for-next' into users/mkubecek/SLE15-SP2/for-next Pull an smp fix from Jürgen Groß. --- diff --git a/config/arm64/default b/config/arm64/default index 1a006f5..0335f83 100644 --- a/config/arm64/default +++ b/config/arm64/default @@ -9306,6 +9306,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set +# CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...) CONFIG_STACKTRACE=y diff --git a/config/ppc64le/default b/config/ppc64le/default index cb0d998..d704ede 100644 --- a/config/ppc64le/default +++ b/config/ppc64le/default @@ -6082,6 +6082,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set +# CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...) CONFIG_STACKTRACE=y diff --git a/config/s390x/default b/config/s390x/default index 26b28dc..578e339 100644 --- a/config/s390x/default +++ b/config/s390x/default @@ -3881,6 +3881,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set +# CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...) CONFIG_STACKTRACE=y diff --git a/config/s390x/zfcpdump b/config/s390x/zfcpdump index 7a5b7a6..a8cdf0b 100644 --- a/config/s390x/zfcpdump +++ b/config/s390x/zfcpdump @@ -1401,6 +1401,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set +# CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...) CONFIG_STACKTRACE=y diff --git a/config/x86_64/default b/config/x86_64/default index 9b30399..a4bb961 100644 --- a/config/x86_64/default +++ b/config/x86_64/default @@ -8745,6 +8745,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set +CONFIG_CSD_LOCK_WAIT_DEBUG=y # end of Lock Debugging (spinlocks, mutexes, etc...) CONFIG_STACKTRACE=y diff --git a/patches.kabi/fix-kabi-after-call_single_data-modification.patch b/patches.kabi/fix-kabi-after-call_single_data-modification.patch new file mode 100644 index 0000000..409dc5b --- /dev/null +++ b/patches.kabi/fix-kabi-after-call_single_data-modification.patch @@ -0,0 +1,35 @@ +From: Juergen Gross +Date: Thu, 2 Mar 2021 08:03:16 +0100 +Subject: [PATCH] kABI: Fix kABI after modifying struct __call_single_data +Patch-mainline: Never, kABI fix +References: bsc#1180846 + +Fix kABI of struct __call_single_data after adding src and dst +elements. + +This is trivial, as the struct is 8-byte aligned for 64-bit due to the +pointers in it and there are 4 bytes padding at the end of the struct +where src and dst have been added for 64-bit only. + +Signed-off-by: Juergen Gross +--- + include/linux/smp.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/include/linux/smp.h b/include/linux/smp.h +index 80d557ef8a11..9f13966d3d92 100644 +--- a/include/linux/smp.h ++++ b/include/linux/smp.h +@@ -21,7 +21,9 @@ struct __call_single_data { + void *info; + unsigned int flags; + #ifdef CONFIG_64BIT ++#ifndef __GENKSYMS__ + u16 src, dst; ++#endif + #endif + }; + +-- +2.25.0 + diff --git a/patches.suse/kernel-smp-Provide-CSD-lock-timeout-diagnostics.patch b/patches.suse/kernel-smp-Provide-CSD-lock-timeout-diagnostics.patch new file mode 100644 index 0000000..3441d18 --- /dev/null +++ b/patches.suse/kernel-smp-Provide-CSD-lock-timeout-diagnostics.patch @@ -0,0 +1,240 @@ +Patch-mainline: v5.10-rc1 +Git-commit: 35feb60474bf4f7fa7840e14fc7fd344996b919d +References: bsc#1180846 +From: "Paul E. McKenney" +Date: Tue, 30 Jun 2020 13:22:54 -0700 +Subject: [PATCH] kernel/smp: Provide CSD lock timeout diagnostics + +This commit causes csd_lock_wait() to emit diagnostics when a CPU +fails to respond quickly enough to one of the smp_call_function() +family of function calls. These diagnostics are enabled by a new +CSD_LOCK_WAIT_DEBUG Kconfig option that depends on DEBUG_KERNEL. + +This commit was inspired by an earlier patch by Josef Bacik. + +[ paulmck: Fix for syzbot+0f719294463916a3fc0e@syzkaller.appspotmail.com ] +[ paulmck: Fix KASAN use-after-free issue reported by Qian Cai. ] +[ paulmck: Fix botched nr_cpu_ids comparison per Dan Carpenter. ] +[ paulmck: Apply Peter Zijlstra feedback. ] +Link: https://lore.kernel.org/lkml/00000000000042f21905a991ecea@google.com +Link: https://lore.kernel.org/lkml/0000000000002ef21705a9933cf3@google.com +Cc: Peter Zijlstra +Cc: Ingo Molnar +Cc: Thomas Gleixner +Cc: Sebastian Andrzej Siewior +Signed-off-by: Paul E. McKenney +Signed-off-by: Juergen Gross +--- + kernel/smp.c | 132 +++++++++++++++++++++++++++++++++++++++++++++- + lib/Kconfig.debug | 11 ++++ + 2 files changed, 141 insertions(+), 2 deletions(-) + +diff --git a/kernel/smp.c b/kernel/smp.c +index 865a876f83ce..c5d31885bd30 100644 +--- a/kernel/smp.c ++++ b/kernel/smp.c +@@ -20,6 +20,9 @@ + #include + #include + #include ++#include ++#include ++#include + + #include "smpboot.h" + +@@ -97,6 +100,90 @@ void __init call_function_init(void) + smpcfd_prepare_cpu(smp_processor_id()); + } + ++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG ++ ++static DEFINE_PER_CPU(call_single_data_t *, cur_csd); ++static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); ++static DEFINE_PER_CPU(void *, cur_csd_info); ++ ++#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) ++atomic_t csd_bug_count = ATOMIC_INIT(0); ++ ++/* Record current CSD work for current CPU, NULL to erase. */ ++static void csd_lock_record(call_single_data_t *csd) ++{ ++ if (!csd) { ++ smp_mb(); /* NULL cur_csd after unlock. */ ++ __this_cpu_write(cur_csd, NULL); ++ return; ++ } ++ __this_cpu_write(cur_csd_func, csd->func); ++ __this_cpu_write(cur_csd_info, csd->info); ++ smp_wmb(); /* func and info before csd. */ ++ __this_cpu_write(cur_csd, csd); ++ smp_mb(); /* Update cur_csd before function call. */ ++ /* Or before unlock, as the case may be. */ ++} ++ ++/* ++ * Complain if too much time spent waiting. ++ */ ++static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) ++{ ++ int cpu = -1; ++ int cpux; ++ bool firsttime; ++ u64 ts2, ts_delta; ++ call_single_data_t *cpu_cur_csd; ++ unsigned int flags = READ_ONCE(csd->flags); ++ ++ if (!(flags & CSD_FLAG_LOCK)) { ++ if (!unlikely(*bug_id)) ++ return true; ++ pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n", ++ *bug_id, raw_smp_processor_id(), csd->dst); ++ return true; ++ } ++ ++ ts2 = sched_clock(); ++ ts_delta = ts2 - *ts1; ++ if (likely(ts_delta <= CSD_LOCK_TIMEOUT)) ++ return false; ++ ++ firsttime = !*bug_id; ++ if (firsttime) ++ *bug_id = atomic_inc_return(&csd_bug_count); ++ cpu = csd->dst; ++ if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu)) ++ cpux = 0; ++ else ++ cpux = cpu; ++ cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */ ++ pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n", ++ firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0, ++ cpu, csd->func, csd->info); ++ if (cpu_cur_csd && csd != cpu_cur_csd) { ++ pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n", ++ *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)), ++ READ_ONCE(per_cpu(cur_csd_info, cpux))); ++ } else { ++ pr_alert("\tcsd: CSD lock (#%d) %s.\n", ++ *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); ++ } ++ if (cpu >= 0) { ++ if (!trigger_single_cpu_backtrace(cpu)) ++ dump_cpu_task(cpu); ++ if (!cpu_cur_csd) { ++ pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); ++ arch_send_call_function_single_ipi(cpu); ++ } ++ } ++ dump_stack(); ++ *ts1 = ts2; ++ ++ return false; ++} ++ + /* + * csd_lock/csd_unlock used to serialize access to per-cpu csd resources + * +@@ -104,10 +191,30 @@ void __init call_function_init(void) + * previous function call. For multi-cpu calls its even more interesting + * as we'll have to ensure no other cpu is observing our csd. + */ ++static __always_inline void csd_lock_wait(call_single_data_t *csd) ++{ ++ int bug_id = 0; ++ u64 ts0, ts1; ++ ++ ts1 = ts0 = sched_clock(); ++ for (;;) { ++ if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id)) ++ break; ++ cpu_relax(); ++ } ++ smp_acquire__after_ctrl_dep(); ++} ++ ++#else ++static void csd_lock_record(call_single_data_t *csd) ++{ ++} ++ + static __always_inline void csd_lock_wait(call_single_data_t *csd) + { + smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); + } ++#endif + + static __always_inline void csd_lock(call_single_data_t *csd) + { +@@ -149,9 +256,11 @@ static int generic_exec_single(int cpu, call_single_data_t *csd) + * We can unlock early even for the synchronous on-stack case, + * since we're doing this from the same CPU.. + */ ++ csd_lock_record(csd); + csd_unlock(csd); + local_irq_save(flags); + func(info); ++ csd_lock_record(NULL); + local_irq_restore(flags); + return 0; + } +@@ -164,7 +277,8 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, + + csd->func = func; + csd->info = info; +-#ifdef CONFIG_64BIT ++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG ++ csd->src = smp_processor_id(); + csd->dst = cpu; + #endif + +@@ -242,6 +351,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) + smp_call_func_t func = csd->func; + void *info = csd->info; + ++ csd_lock_record(csd); + /* Do we wait until *after* callback? */ + if (csd->flags & CSD_FLAG_SYNCHRONOUS) { + func(info); +@@ -250,6 +360,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) + csd_unlock(csd); + func(info); + } ++ csd_lock_record(NULL); + } + + /* +@@ -543,7 +657,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask, + csd->flags |= CSD_FLAG_SYNCHRONOUS; + csd->func = func; + csd->info = info; +-#ifdef CONFIG_64BIT ++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG ++ csd->src = smp_processor_id(); + csd->dst = cpu; + #endif + if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) +diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug +index e068c3c7189a..86a35fdfe021 100644 +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -1367,6 +1367,17 @@ config WW_MUTEX_SELFTEST + Say M if you want these self tests to build as a module. + Say N if you are unsure. + ++config CSD_LOCK_WAIT_DEBUG ++ bool "Debugging for csd_lock_wait(), called from smp_call_function*()" ++ depends on DEBUG_KERNEL ++ depends on 64BIT ++ default n ++ help ++ This option enables debug prints when CPUs are slow to respond ++ to the smp_call_function*() IPI wrappers. These debug prints ++ include the IPI handler function currently executing (if any) ++ and relevant stack traces. ++ + endmenu # lock debugging + + config TRACE_IRQFLAGS +-- +2.26.2 + diff --git a/patches.suse/kernel-smp-add-boot-parameter-for-controlling-CSD.patch b/patches.suse/kernel-smp-add-boot-parameter-for-controlling-CSD.patch new file mode 100644 index 0000000..77f1638 --- /dev/null +++ b/patches.suse/kernel-smp-add-boot-parameter-for-controlling-CSD.patch @@ -0,0 +1,132 @@ +Patch-mainline: Submitted, 1 Mar 2021 14:17:33, lkml +References: bsc#1180846 +From: Juergen Gross +Date: Mon, 1 Mar 2021 14:17:33 +0100 +Subject: [PATCH 1/4] kernel/smp: add boot parameter for controlling CSD + lock debugging + +Currently CSD lock debugging can be switched on and off via a kernel +config option only. Unfortunately there is at least one problem with +CSD lock handling pending for about 2 years now, which has been seen +in different environments (mostly when running virtualized under KVM +or Xen, at least once on bare metal). Multiple attempts to catch this +issue have finally led to introduction of CSD lock debug code, but +this code is not in use in most distros as it has some impact on +performance. + +In order to be able to ship kernels with CONFIG_CSD_LOCK_WAIT_DEBUG +enabled even for production use, add a boot parameter for switching +the debug functionality on. This will reduce any performance impact +of the debug coding to a bare minimum when not being used. + +Signed-off-by: Juergen Gross +--- + .../admin-guide/kernel-parameters.txt | 6 +++ + kernel/smp.c | 38 +++++++++++++++++-- + 2 files changed, 40 insertions(+), 4 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 04545725f187..31dbf7b2f0e8 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -784,6 +784,12 @@ + cs89x0_media= [HW,NET] + Format: { rj45 | aui | bnc } + ++ csdlock_debug= [KNL] Enable debug add-ons of cross-cpu function call ++ handling. When switched on additional debug data is ++ printed to the console in case a hanging cpu is ++ detected and that cpu is pinged again in order to try ++ to resolve the hang situation. ++ + dasd= [HW,NET] + See header of drivers/s390/block/dasd_devmap.c. + +diff --git a/kernel/smp.c b/kernel/smp.c +index aeb0adfa0606..d5f0b21ab55e 100644 +--- a/kernel/smp.c ++++ b/kernel/smp.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + + #include "smpboot.h" + +@@ -102,6 +103,20 @@ void __init call_function_init(void) + + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG + ++static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled); ++ ++static int __init csdlock_debug(char *str) ++{ ++ unsigned int val = 0; ++ ++ get_option(&str, &val); ++ if (val) ++ static_branch_enable(&csdlock_debug_enabled); ++ ++ return 0; ++} ++early_param("csdlock_debug", csdlock_debug); ++ + static DEFINE_PER_CPU(call_single_data_t *, cur_csd); + static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); + static DEFINE_PER_CPU(void *, cur_csd_info); +@@ -110,7 +125,7 @@ static DEFINE_PER_CPU(void *, cur_csd_info); + atomic_t csd_bug_count = ATOMIC_INIT(0); + + /* Record current CSD work for current CPU, NULL to erase. */ +-static void csd_lock_record(call_single_data_t *csd) ++static void __csd_lock_record(call_single_data_t *csd) + { + if (!csd) { + smp_mb(); /* NULL cur_csd after unlock. */ +@@ -125,10 +140,16 @@ static void csd_lock_record(call_single_data_t *csd) + /* Or before unlock, as the case may be. */ + } + ++static __always_inline void csd_lock_record(call_single_data_t *csd) ++{ ++ if (static_branch_unlikely(&csdlock_debug_enabled)) ++ __csd_lock_record(csd); ++} ++ + /* + * Complain if too much time spent waiting. + */ +-static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) ++static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) + { + int cpu = -1; + int cpux; +@@ -204,7 +225,7 @@ static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 t + * previous function call. For multi-cpu calls its even more interesting + * as we'll have to ensure no other cpu is observing our csd. + */ +-static __always_inline void csd_lock_wait(call_single_data_t *csd) ++static void __csd_lock_wait(call_single_data_t *csd) + { + int bug_id = 0; + u64 ts0, ts1; +@@ -218,6 +239,15 @@ static __always_inline void csd_lock_wait(call_single_data_t *csd) + smp_acquire__after_ctrl_dep(); + } + ++static __always_inline void csd_lock_wait(call_single_data_t *csd) ++{ ++ if (static_branch_unlikely(&csdlock_debug_enabled)) { ++ __csd_lock_wait(csd); ++ return; ++ } ++ ++ smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); ++} + #else + static void csd_lock_record(call_single_data_t *csd) + { +-- +2.26.2 + diff --git a/patches.suse/kernel-smp-add-more-data-to-CSD-lock-debugging.patch b/patches.suse/kernel-smp-add-more-data-to-CSD-lock-debugging.patch new file mode 100644 index 0000000..a0bde90 --- /dev/null +++ b/patches.suse/kernel-smp-add-more-data-to-CSD-lock-debugging.patch @@ -0,0 +1,431 @@ +Patch-mainline: Submitted, 1 Mar 2021 14:17:33, lkml +References: bsc#1180846 +From: Juergen Gross +Date: Mon, 1 Mar 2021 14:17:33 +0100 +Subject: [PATCH 3/4] kernel/smp: add more data to CSD lock debugging + +In order to help identifying problems with IPI handling and remote +function execution add some more data to IPI debugging code. + +There have been multiple reports of cpus looping long times (many +seconds) in smp_call_function_many() waiting for another cpu executing +a function like tlb flushing. Most of these reports have been for +cases where the kernel was running as a guest on top of KVM or Xen +(there are rumours of that happening under VMWare, too, and even on +bare metal). + +Finding the root cause hasn't been successful yet, even after more than +2 years of chasing this bug by different developers. + +Commit 35feb60474bf4f7 ("kernel/smp: Provide CSD lock timeout +diagnostics") tried to address this by adding some debug code and by +issuing another IPI when a hang was detected. This helped mitigating +the problem (the repeated IPI unlocks the hang), but the root cause is +still unknown. + +Current available data suggests that either an IPI wasn't sent when it +should have been, or that the IPI didn't result in the target cpu +executing the queued function (due to the IPI not reaching the cpu, +the IPI handler not being called, or the handler not seeing the queued +request). + +Try to add more diagnostic data by introducing a global atomic counter +which is being incremented when doing critical operations (before and +after queueing a new request, when sending an IPI, and when dequeueing +a request). The counter value is stored in percpu variables which can +be printed out when a hang is detected. + +The data of the last event (consisting of sequence counter, source +cpu, target cpu, and event type) is stored in a global variable. When +a new event is to be traced, the data of the last event is stored in +the event related percpu location and the global data is updated with +the new event's data. This allows to track two events in one data +location: one by the value of the event data (the event before the +current one), and one by the location itself (the current event). + +A typical printout with a detected hang will look like this: + +csd: Detected non-responsive CSD lock (#1) on CPU#1, waiting 5000000003 ns for CPU#06 scf_handler_1+0x0/0x50(0xffffa2a881bb1410). + csd: CSD lock (#1) handling prior scf_handler_1+0x0/0x50(0xffffa2a8813823c0) request. + csd: cnt(00008cc): ffff->0000 dequeue (src cpu 0 == empty) + csd: cnt(00008cd): ffff->0006 idle + csd: cnt(0003668): 0001->0006 queue + csd: cnt(0003669): 0001->0006 ipi + csd: cnt(0003e0f): 0007->000a queue + csd: cnt(0003e10): 0001->ffff ping + csd: cnt(0003e71): 0003->0000 ping + csd: cnt(0003e72): ffff->0006 gotipi + csd: cnt(0003e73): ffff->0006 handle + csd: cnt(0003e74): ffff->0006 dequeue (src cpu 0 == empty) + csd: cnt(0003e7f): 0004->0006 ping + csd: cnt(0003e80): 0001->ffff pinged + csd: cnt(0003eb2): 0005->0001 noipi + csd: cnt(0003eb3): 0001->0006 queue + csd: cnt(0003eb4): 0001->0006 noipi + csd: cnt now: 0003f00 + +This example (being an artificial one, produced with a previous version +of this patch without the "hdlend" event), shows that cpu#6 started to +handle an IPI (cnt 3e72-3e74), bit didn't start to handle another IPI +(sent by cpu#4, cnt 3e7f). The next request from cpu#1 for cpu#6 was +queued (3eb3), but no IPI was needed (cnt 3eb4, there was the event +from cpu#4 in the queue already). + +The idea is to print only relevant entries. Those are all events which +are associated with the hang (so sender side events for the source cpu +of the hanging request, and receiver side events for the target cpu), +and the related events just before those (for adding data needed to +identify a possible race). Printing all available data would be +possible, but this would add large amounts of data printed on larger +configurations. + +Signed-off-by: Juergen Gross +Tested-by: Paul E. McKenney +--- + .../admin-guide/kernel-parameters.txt | 4 + + kernel/smp.c | 228 +++++++++++++++++- + 2 files changed, 228 insertions(+), 4 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 31dbf7b2f0e8..80c72f8e780d 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -789,6 +789,10 @@ + printed to the console in case a hanging cpu is + detected and that cpu is pinged again in order to try + to resolve the hang situation. ++ 0: disable csdlock debugging (default) ++ 1: enable basic csdlock debugging (minor impact) ++ ext: enable extended csdlock debugging (more impact, ++ but more data) + + dasd= [HW,NET] + See header of drivers/s390/block/dasd_devmap.c. +diff --git a/kernel/smp.c b/kernel/smp.c +index 6d7e6dbe33dc..1a96691dbf7f 100644 +--- a/kernel/smp.c ++++ b/kernel/smp.c +@@ -31,8 +31,59 @@ + CSD_FLAG_SYNCHRONOUS = 0x02, + }; + ++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG ++union cfd_seq_cnt { ++ u64 val; ++ struct { ++ u64 src:16; ++ u64 dst:16; ++#define CFD_SEQ_NOCPU 0xffff ++ u64 type:4; ++#define CFD_SEQ_QUEUE 0 ++#define CFD_SEQ_IPI 1 ++#define CFD_SEQ_NOIPI 2 ++#define CFD_SEQ_PING 3 ++#define CFD_SEQ_PINGED 4 ++#define CFD_SEQ_HANDLE 5 ++#define CFD_SEQ_DEQUEUE 6 ++#define CFD_SEQ_IDLE 7 ++#define CFD_SEQ_GOTIPI 8 ++#define CFD_SEQ_HDLEND 9 ++ u64 cnt:28; ++ } u; ++}; ++ ++static char *seq_type[] = { ++ [CFD_SEQ_QUEUE] = "queue", ++ [CFD_SEQ_IPI] = "ipi", ++ [CFD_SEQ_NOIPI] = "noipi", ++ [CFD_SEQ_PING] = "ping", ++ [CFD_SEQ_PINGED] = "pinged", ++ [CFD_SEQ_HANDLE] = "handle", ++ [CFD_SEQ_DEQUEUE] = "dequeue (src cpu 0 == empty)", ++ [CFD_SEQ_IDLE] = "idle", ++ [CFD_SEQ_GOTIPI] = "gotipi", ++ [CFD_SEQ_HDLEND] = "hdlend (src cpu 0 == early)", ++}; ++ ++struct cfd_seq_local { ++ u64 ping; ++ u64 pinged; ++ u64 handle; ++ u64 dequeue; ++ u64 idle; ++ u64 gotipi; ++ u64 hdlend; ++}; ++#endif ++ + struct cfd_percpu { + call_single_data_t csd; ++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG ++ u64 seq_queue; ++ u64 seq_ipi; ++ u64 seq_noipi; ++#endif + }; + + struct call_function_data { +@@ -108,12 +159,18 @@ void __init call_function_init(void) + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG + + static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled); ++static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended); + + static int __init csdlock_debug(char *str) + { + unsigned int val = 0; + +- get_option(&str, &val); ++ if (str && !strcmp(str, "ext")) { ++ val = 1; ++ static_branch_enable(&csdlock_debug_extended); ++ } else ++ get_option(&str, &val); ++ + if (val) + static_branch_enable(&csdlock_debug_enabled); + +@@ -124,9 +181,34 @@ early_param("csdlock_debug", csdlock_debug); + static DEFINE_PER_CPU(call_single_data_t *, cur_csd); + static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); + static DEFINE_PER_CPU(void *, cur_csd_info); ++static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local); + + #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) + atomic_t csd_bug_count = ATOMIC_INIT(0); ++static u64 cfd_seq; ++ ++#define CFD_SEQ(s, d, t, c) \ ++ (union cfd_seq_cnt){ .u.src = s, .u.dst = d, .u.type = t, .u.cnt = c } ++ ++static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type) ++{ ++ union cfd_seq_cnt new, old; ++ ++ new = CFD_SEQ(src, dst, type, 0); ++ ++ do { ++ old.val = READ_ONCE(cfd_seq); ++ new.u.cnt = old.u.cnt + 1; ++ } while (cmpxchg(&cfd_seq, old.val, new.val) != old.val); ++ ++ return old.val; ++} ++ ++#define cfd_seq_store(var, src, dst, type) \ ++ do { \ ++ if (static_branch_unlikely(&csdlock_debug_extended)) \ ++ var = cfd_seq_inc(src, dst, type); \ ++ } while (0) + + /* Record current CSD work for current CPU, NULL to erase. */ + static void __csd_lock_record(call_single_data_t *csd) +@@ -160,6 +242,88 @@ static __always_inline void csd_lock_record(call_single_data_t *csd) + __csd_lock_record(csd); + } + ++static void cfd_seq_data_add(u64 val, unsigned int src, unsigned int dst, ++ unsigned int type, union cfd_seq_cnt *data, ++ unsigned int *n_data, unsigned int now) ++{ ++ union cfd_seq_cnt new[2]; ++ unsigned int i, j, k; ++ ++ new[0].val = val; ++ new[1] = CFD_SEQ(src, dst, type, new[0].u.cnt + 1); ++ ++ for (i = 0; i < 2; i++) { ++ if (new[i].u.cnt <= now) ++ new[i].u.cnt |= 0x80000000U; ++ for (j = 0; j < *n_data; j++) { ++ if (new[i].u.cnt == data[j].u.cnt) { ++ /* Direct read value trumps generated one. */ ++ if (i == 0) ++ data[j].val = new[i].val; ++ break; ++ } ++ if (new[i].u.cnt < data[j].u.cnt) { ++ for (k = *n_data; k > j; k--) ++ data[k].val = data[k - 1].val; ++ data[j].val = new[i].val; ++ (*n_data)++; ++ break; ++ } ++ } ++ if (j == *n_data) { ++ data[j].val = new[i].val; ++ (*n_data)++; ++ } ++ } ++} ++ ++static const char *csd_lock_get_type(unsigned int type) ++{ ++ return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type]; ++} ++ ++static void csd_lock_print_extended(call_single_data_t *csd, int cpu) ++{ ++ struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu); ++ unsigned int srccpu = csd->src; ++ struct call_function_data *cfd = per_cpu_ptr(&cfd_data, srccpu); ++ struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); ++ unsigned int now; ++ union cfd_seq_cnt data[2 * ARRAY_SIZE(seq_type)]; ++ unsigned int n_data = 0, i; ++ ++ data[0].val = READ_ONCE(cfd_seq); ++ now = data[0].u.cnt; ++ ++ cfd_seq_data_add(pcpu->seq_queue, srccpu, cpu, ++ CFD_SEQ_QUEUE, data, &n_data, now); ++ cfd_seq_data_add(pcpu->seq_ipi, srccpu, cpu, ++ CFD_SEQ_IPI, data, &n_data, now); ++ cfd_seq_data_add(pcpu->seq_noipi, srccpu, cpu, ++ CFD_SEQ_NOIPI, data, &n_data, now); ++ cfd_seq_data_add(per_cpu(cfd_seq_local.ping, srccpu), srccpu, ++ CFD_SEQ_NOCPU, CFD_SEQ_PING, data, &n_data, now); ++ cfd_seq_data_add(per_cpu(cfd_seq_local.pinged, srccpu), srccpu, ++ CFD_SEQ_NOCPU, CFD_SEQ_PINGED, data, &n_data, now); ++ cfd_seq_data_add(seq->idle, CFD_SEQ_NOCPU, cpu, ++ CFD_SEQ_IDLE, data, &n_data, now); ++ cfd_seq_data_add(seq->gotipi, CFD_SEQ_NOCPU, cpu, ++ CFD_SEQ_GOTIPI, data, &n_data, now); ++ cfd_seq_data_add(seq->handle, CFD_SEQ_NOCPU, cpu, ++ CFD_SEQ_HANDLE, data, &n_data, now); ++ cfd_seq_data_add(seq->dequeue, CFD_SEQ_NOCPU, cpu, ++ CFD_SEQ_DEQUEUE, data, &n_data, now); ++ cfd_seq_data_add(seq->hdlend, CFD_SEQ_NOCPU, cpu, ++ CFD_SEQ_HDLEND, data, &n_data, now); ++ ++ for (i = 0; i < n_data; i++) { ++ pr_alert("\tcsd: cnt(%07x): %04x->%04x %s\n", ++ data[i].u.cnt & ~0x80000000U, data[i].u.src, ++ data[i].u.dst, csd_lock_get_type(data[i].u.type)); ++ } ++ pr_alert("\tcsd: cnt now: %07x\n", now); ++} ++ + /* + * Complain if too much time spent waiting. + */ +@@ -209,6 +373,8 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in + *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); + } + if (cpu >= 0) { ++ if (static_branch_unlikely(&csdlock_debug_extended)) ++ csd_lock_print_extended(csd, cpu); + if (!trigger_single_cpu_backtrace(cpu)) + dump_cpu_task(cpu); + if (!cpu_cur_csd) { +@@ -252,7 +418,27 @@ static __always_inline void csd_lock_wait(call_single_data_t *csd) + + smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); + } ++ ++static void __smp_call_single_queue_debug(int cpu, struct llist_node *node) ++{ ++ unsigned int this_cpu = smp_processor_id(); ++ struct cfd_seq_local *seq = this_cpu_ptr(&cfd_seq_local); ++ struct call_function_data *cfd = this_cpu_ptr(&cfd_data); ++ struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); ++ ++ cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE); ++ if (llist_add(node, &per_cpu(call_single_queue, cpu))) { ++ cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI); ++ cfd_seq_store(seq->ping, this_cpu, cpu, CFD_SEQ_PING); ++ arch_send_call_function_single_ipi(cpu); ++ cfd_seq_store(seq->pinged, this_cpu, cpu, CFD_SEQ_PINGED); ++ } else { ++ cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI); ++ } ++} + #else ++#define cfd_seq_store(var, src, dst, type) ++ + static void csd_lock_record(call_single_data_t *csd) + { + } +@@ -335,6 +521,13 @@ static int generic_exec_single(int cpu, call_single_data_t *csd) + csd->dst = cpu; + #endif + ++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG ++ if (static_branch_unlikely(&csdlock_debug_extended)) { ++ __smp_call_single_queue_debug(cpu, &csd->llist); ++ return 0; ++ } ++#endif ++ + /* + * The list addition should be visible before sending the IPI + * handler locks the list to pull the entry off it because of +@@ -348,6 +541,8 @@ static int generic_exec_single(int cpu, call_single_data_t *csd) + */ + void generic_smp_call_function_single_interrupt(void) + { ++ cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU, ++ smp_processor_id(), CFD_SEQ_GOTIPI); + flush_smp_call_function_queue(true); + } + +@@ -375,7 +570,13 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) + lockdep_assert_irqs_disabled(); + + head = this_cpu_ptr(&call_single_queue); ++ cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->handle, CFD_SEQ_NOCPU, ++ smp_processor_id(), CFD_SEQ_HANDLE); + entry = llist_del_all(head); ++ cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->dequeue, ++ /* Special meaning of source cpu: 0 == queue empty */ ++ entry ? CFD_SEQ_NOCPU : 0, ++ smp_processor_id(), CFD_SEQ_DEQUEUE); + entry = llist_reverse_order(entry); + + /* There shouldn't be any pending callbacks on an offline CPU. */ +@@ -473,6 +678,9 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) + * for them. + */ + irq_work_run(); ++ ++ cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, CFD_SEQ_NOCPU, ++ smp_processor_id(), CFD_SEQ_HDLEND); + } + + /* +@@ -698,7 +908,8 @@ static void smp_call_function_many(const struct cpumask *mask, + + cpumask_clear(cfd->cpumask_ipi); + for_each_cpu(cpu, cfd->cpumask) { +- call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd; ++ struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); ++ call_single_data_t *csd = &pcpu->csd; + + csd_lock(csd); + if (wait) +@@ -712,12 +923,21 @@ static void smp_call_function_many_cond(const struct cpumask *mask, + csd->src = smp_processor_id(); + csd->dst = cpu; + #endif +- if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) ++ cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE); ++ if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) { + __cpumask_set_cpu(cpu, cfd->cpumask_ipi); ++ cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI); ++ } else { ++ cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI); ++ } + } + + /* Send a message to all CPUs in the map */ ++ cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, ++ CFD_SEQ_NOCPU, CFD_SEQ_PING); + arch_send_call_function_ipi_mask(cfd->cpumask_ipi); ++ cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, ++ CFD_SEQ_NOCPU, CFD_SEQ_PINGED); + + if (wait) { + for_each_cpu(cpu, cfd->cpumask) { +-- +2.26.2 + diff --git a/patches.suse/kernel-smp-prepare-more-CSD-lock-debugging.patch b/patches.suse/kernel-smp-prepare-more-CSD-lock-debugging.patch new file mode 100644 index 0000000..8d072ca --- /dev/null +++ b/patches.suse/kernel-smp-prepare-more-CSD-lock-debugging.patch @@ -0,0 +1,73 @@ +Patch-mainline: Submitted, 1 Mar 2021 14:17:33, lkml +References: bsc#1180846 +From: Juergen Gross +Subject: [PATCH 2/4] kernel/smp: prepare more CSD lock debugging + +In order to be able to easily add more CSD lock debugging data to +struct call_function_data->csd move the call_single_data_t element +into a sub-structure. + +Signed-off-by: Juergen Gross +--- + kernel/smp.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/kernel/smp.c b/kernel/smp.c +index d5f0b21ab55e..6d7e6dbe33dc 100644 +--- a/kernel/smp.c ++++ b/kernel/smp.c +@@ -31,8 +31,12 @@ + CSD_FLAG_SYNCHRONOUS = 0x02, + }; + ++struct cfd_percpu { ++ call_single_data_t csd; ++}; ++ + struct call_function_data { +- call_single_data_t __percpu *csd; ++ struct cfd_percpu __percpu *pcpu; + cpumask_var_t cpumask; + cpumask_var_t cpumask_ipi; + }; +@@ -55,8 +59,8 @@ int smpcfd_prepare_cpu(unsigned int cpu) + free_cpumask_var(cfd->cpumask); + return -ENOMEM; + } +- cfd->csd = alloc_percpu(call_single_data_t); +- if (!cfd->csd) { ++ cfd->pcpu = alloc_percpu(struct cfd_percpu); ++ if (!cfd->pcpu) { + free_cpumask_var(cfd->cpumask); + free_cpumask_var(cfd->cpumask_ipi); + return -ENOMEM; +@@ -71,7 +75,7 @@ int smpcfd_dead_cpu(unsigned int cpu) + + free_cpumask_var(cfd->cpumask); + free_cpumask_var(cfd->cpumask_ipi); +- free_percpu(cfd->csd); ++ free_percpu(cfd->pcpu); + return 0; + } + +@@ -694,7 +698,7 @@ static void smp_call_function_many(const struct cpumask *mask, + + cpumask_clear(cfd->cpumask_ipi); + for_each_cpu(cpu, cfd->cpumask) { +- call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu); ++ call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd; + + csd_lock(csd); + if (wait) +@@ -719,7 +723,7 @@ static void smp_call_function_many(const struct cpumask *mask, + for_each_cpu(cpu, cfd->cpumask) { + call_single_data_t *csd; + +- csd = per_cpu_ptr(cfd->csd, cpu); ++ csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd; + csd_lock_wait(csd); + } + } +-- +2.26.2 + diff --git a/patches.suse/smp-Add-source-and-destination-CPUs-to-__call_single.patch b/patches.suse/smp-Add-source-and-destination-CPUs-to-__call_single.patch new file mode 100644 index 0000000..7fb5824 --- /dev/null +++ b/patches.suse/smp-Add-source-and-destination-CPUs-to-__call_single.patch @@ -0,0 +1,74 @@ +Patch-mainline: v5.10-rc1 +Git-commit: e48c15b796d412ede883bb2ef7779b2a142f7962 +References: bsc#1180846 +From: "Paul E. McKenney" +Date: Mon, 29 Jun 2020 17:21:32 -0700 +Subject: [PATCH] smp: Add source and destination CPUs to __call_single_data + +This commit adds a destination CPU to __call_single_data, and is inspired +by an earlier commit by Peter Zijlstra. This version adds #ifdef to +permit use by 32-bit systems and supplying the destination CPU for all +smp_call_function*() requests, not just smp_call_function_single(). + +If need be, 32-bit systems could be accommodated by shrinking the flags +field to 16 bits (the atomic_t variant is currently unused) and by +providing only eight bits for CPU on such systems. + +It is not clear that the addition of the fields to __call_single_node +are really needed. + +[ paulmck: Apply Boqun Feng feedback on 32-bit builds. ] +Link: https://lore.kernel.org/lkml/20200615164048.GC2531@hirez.programming.kicks-ass.net/ +Cc: Peter Zijlstra +Cc: Ingo Molnar +Cc: Thomas Gleixner +Cc: Sebastian Andrzej Siewior +Cc: Frederic Weisbecker +Signed-off-by: Paul E. McKenney +Signed-off-by: Juergen Gross +--- + include/linux/smp.h | 3 +++ + kernel/smp.c | 6 ++++++ + 2 files changed, 9 insertions(+) + +diff --git a/include/linux/smp.h b/include/linux/smp.h +index 80d557ef8a11..9f13966d3d92 100644 +--- a/include/linux/smp.h ++++ b/include/linux/smp.h +@@ -20,6 +20,9 @@ struct __call_single_data { + smp_call_func_t func; + void *info; + unsigned int flags; ++#ifdef CONFIG_64BIT ++ u16 src, dst; ++#endif + }; + + /* Use __aligned() to avoid to use 2 cache lines for 1 csd */ +diff --git a/kernel/smp.c b/kernel/smp.c +index d0ae8eb6bf8b..865a876f83ce 100644 +--- a/kernel/smp.c ++++ b/kernel/smp.c +@@ -164,6 +164,9 @@ int generic_exec_single(int cpu, call_single_data_t *csd, + + csd->func = func; + csd->info = info; ++#ifdef CONFIG_64BIT ++ csd->dst = cpu; ++#endif + + /* + * The list addition should be visible before sending the IPI +@@ -470,6 +473,9 @@ static void smp_call_function_many(const struct cpumask *mask, + csd->flags |= CSD_FLAG_SYNCHRONOUS; + csd->func = func; + csd->info = info; ++#ifdef CONFIG_64BIT ++ csd->dst = cpu; ++#endif + if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) + __cpumask_set_cpu(cpu, cfd->cpumask_ipi); + } +-- +2.26.2 + diff --git a/series.conf b/series.conf index ade425b..e202f1c 100644 --- a/series.conf +++ b/series.conf @@ -16412,6 +16412,8 @@ patches.suse/ubifs-dent-Fix-some-potential-memory-leaks-while-ite.patch patches.suse/ubifs-journal-Make-sure-to-not-dirty-twice-for-auth-.patch patches.suse/mailbox-avoid-timer-start-from-callback.patch + patches.suse/smp-Add-source-and-destination-CPUs-to-__call_single.patch + patches.suse/kernel-smp-Provide-CSD-lock-timeout-diagnostics.patch patches.suse/xfs-limit-entries-returned-when-counting-fsmap-recor.patch patches.suse/xfs-fix-high-key-handling-in-the-rt-allocator-s-quer.patch patches.suse/xen-events-avoid-removing-an-event-channel-while-han.patch @@ -18038,6 +18040,9 @@ patches.suse/btrfs-remove-btrfs_inode-from-btrfs_delayed_inode_reserve_metadata.patch patches.suse/btrfs-simplify-code-flow-in-btrfs_delayed_inode_reserve_metadata.patch patches.suse/btrfs-unlock-extents-in-btrfs_zero_range-in-case-of-errors.patch + patches.suse/kernel-smp-add-boot-parameter-for-controlling-CSD.patch + patches.suse/kernel-smp-prepare-more-CSD-lock-debugging.patch + patches.suse/kernel-smp-add-more-data-to-CSD-lock-debugging.patch ######################################################## # kbuild/module infrastructure fixes @@ -18539,6 +18544,7 @@ patches.kabi/powerpc-kABI-add-back-suspend_disable_cpu-in-machdep.patch patches.kabi/mm-thp-kABI-move-the-added-flag-to-the-end-of-enum.patch patches.kabi/kabi-repair-after-nvmx-emulate-mtf-when-performing-instruction-emulation + patches.kabi/fix-kabi-after-call_single_data-modification.patch ######################################################## # You'd better have a good reason for adding a patch