From c35b1245712f30ff544bd6dc082efdff25ea9a17 Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Mar 02 2021 13:15:25 +0000
Subject: Merge branch 'users/jgross/SLE15-SP2/for-next' into users/mkubecek/SLE15-SP2/for-next


Pull an smp fix from Jürgen Groß.

---

diff --git a/config/arm64/default b/config/arm64/default
index 1a006f5..0335f83 100644
--- a/config/arm64/default
+++ b/config/arm64/default
@@ -9306,6 +9306,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_LOCK_TORTURE_TEST is not set
 # CONFIG_WW_MUTEX_SELFTEST is not set
+# CONFIG_CSD_LOCK_WAIT_DEBUG is not set
 # end of Lock Debugging (spinlocks, mutexes, etc...)
 
 CONFIG_STACKTRACE=y
diff --git a/config/ppc64le/default b/config/ppc64le/default
index cb0d998..d704ede 100644
--- a/config/ppc64le/default
+++ b/config/ppc64le/default
@@ -6082,6 +6082,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_LOCK_TORTURE_TEST is not set
 # CONFIG_WW_MUTEX_SELFTEST is not set
+# CONFIG_CSD_LOCK_WAIT_DEBUG is not set
 # end of Lock Debugging (spinlocks, mutexes, etc...)
 
 CONFIG_STACKTRACE=y
diff --git a/config/s390x/default b/config/s390x/default
index 26b28dc..578e339 100644
--- a/config/s390x/default
+++ b/config/s390x/default
@@ -3881,6 +3881,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_LOCK_TORTURE_TEST is not set
 # CONFIG_WW_MUTEX_SELFTEST is not set
+# CONFIG_CSD_LOCK_WAIT_DEBUG is not set
 # end of Lock Debugging (spinlocks, mutexes, etc...)
 
 CONFIG_STACKTRACE=y
diff --git a/config/s390x/zfcpdump b/config/s390x/zfcpdump
index 7a5b7a6..a8cdf0b 100644
--- a/config/s390x/zfcpdump
+++ b/config/s390x/zfcpdump
@@ -1401,6 +1401,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_LOCK_TORTURE_TEST is not set
 # CONFIG_WW_MUTEX_SELFTEST is not set
+# CONFIG_CSD_LOCK_WAIT_DEBUG is not set
 # end of Lock Debugging (spinlocks, mutexes, etc...)
 
 CONFIG_STACKTRACE=y
diff --git a/config/x86_64/default b/config/x86_64/default
index 9b30399..a4bb961 100644
--- a/config/x86_64/default
+++ b/config/x86_64/default
@@ -8745,6 +8745,7 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_LOCK_TORTURE_TEST is not set
 # CONFIG_WW_MUTEX_SELFTEST is not set
+CONFIG_CSD_LOCK_WAIT_DEBUG=y
 # end of Lock Debugging (spinlocks, mutexes, etc...)
 
 CONFIG_STACKTRACE=y
diff --git a/patches.kabi/fix-kabi-after-call_single_data-modification.patch b/patches.kabi/fix-kabi-after-call_single_data-modification.patch
new file mode 100644
index 0000000..409dc5b
--- /dev/null
+++ b/patches.kabi/fix-kabi-after-call_single_data-modification.patch
@@ -0,0 +1,35 @@
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 2 Mar 2021 08:03:16 +0100
+Subject: [PATCH] kABI: Fix kABI after modifying struct __call_single_data
+Patch-mainline: Never, kABI fix
+References: bsc#1180846
+
+Fix kABI of struct __call_single_data after adding src and dst
+elements.
+
+This is trivial, as the struct is 8-byte aligned for 64-bit due to the
+pointers in it and there are 4 bytes padding at the end of the struct
+where src and dst have been added for 64-bit only.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+---
+ include/linux/smp.h             | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/include/linux/smp.h b/include/linux/smp.h
+index 80d557ef8a11..9f13966d3d92 100644
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -21,7 +21,9 @@ struct __call_single_data {
+ 	void *info;
+ 	unsigned int flags;
+ #ifdef CONFIG_64BIT
++#ifndef __GENKSYMS__
+ 	u16 src, dst;
++#endif
+ #endif
+ };
+ 
+-- 
+2.25.0
+
diff --git a/patches.suse/kernel-smp-Provide-CSD-lock-timeout-diagnostics.patch b/patches.suse/kernel-smp-Provide-CSD-lock-timeout-diagnostics.patch
new file mode 100644
index 0000000..3441d18
--- /dev/null
+++ b/patches.suse/kernel-smp-Provide-CSD-lock-timeout-diagnostics.patch
@@ -0,0 +1,240 @@
+Patch-mainline: v5.10-rc1
+Git-commit: 35feb60474bf4f7fa7840e14fc7fd344996b919d
+References: bsc#1180846
+From: "Paul E. McKenney" <paulmck@kernel.org>
+Date: Tue, 30 Jun 2020 13:22:54 -0700
+Subject: [PATCH] kernel/smp: Provide CSD lock timeout diagnostics
+
+This commit causes csd_lock_wait() to emit diagnostics when a CPU
+fails to respond quickly enough to one of the smp_call_function()
+family of function calls.  These diagnostics are enabled by a new
+CSD_LOCK_WAIT_DEBUG Kconfig option that depends on DEBUG_KERNEL.
+
+This commit was inspired by an earlier patch by Josef Bacik.
+
+[ paulmck: Fix for syzbot+0f719294463916a3fc0e@syzkaller.appspotmail.com ]
+[ paulmck: Fix KASAN use-after-free issue reported by Qian Cai. ]
+[ paulmck: Fix botched nr_cpu_ids comparison per Dan Carpenter. ]
+[ paulmck: Apply Peter Zijlstra feedback. ]
+Link: https://lore.kernel.org/lkml/00000000000042f21905a991ecea@google.com
+Link: https://lore.kernel.org/lkml/0000000000002ef21705a9933cf3@google.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+---
+ kernel/smp.c      | 132 +++++++++++++++++++++++++++++++++++++++++++++-
+ lib/Kconfig.debug |  11 ++++
+ 2 files changed, 141 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/smp.c b/kernel/smp.c
+index 865a876f83ce..c5d31885bd30 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -20,6 +20,9 @@
+ #include <linux/sched.h>
+ #include <linux/sched/idle.h>
+ #include <linux/hypervisor.h>
++#include <linux/sched/clock.h>
++#include <linux/nmi.h>
++#include <linux/sched/debug.h>
+ 
+ #include "smpboot.h"
+ 
+@@ -97,6 +100,90 @@ void __init call_function_init(void)
+ 	smpcfd_prepare_cpu(smp_processor_id());
+ }
+ 
++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
++
++static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
++static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
++static DEFINE_PER_CPU(void *, cur_csd_info);
++
++#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
++atomic_t csd_bug_count = ATOMIC_INIT(0);
++
++/* Record current CSD work for current CPU, NULL to erase. */
++static void csd_lock_record(call_single_data_t *csd)
++{
++	if (!csd) {
++		smp_mb(); /* NULL cur_csd after unlock. */
++		__this_cpu_write(cur_csd, NULL);
++		return;
++	}
++	__this_cpu_write(cur_csd_func, csd->func);
++	__this_cpu_write(cur_csd_info, csd->info);
++	smp_wmb(); /* func and info before csd. */
++	__this_cpu_write(cur_csd, csd);
++	smp_mb(); /* Update cur_csd before function call. */
++		  /* Or before unlock, as the case may be. */
++}
++
++/*
++ * Complain if too much time spent waiting.
++ */
++static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
++{
++	int cpu = -1;
++	int cpux;
++	bool firsttime;
++	u64 ts2, ts_delta;
++	call_single_data_t *cpu_cur_csd;
++	unsigned int flags = READ_ONCE(csd->flags);
++
++	if (!(flags & CSD_FLAG_LOCK)) {
++		if (!unlikely(*bug_id))
++			return true;
++		pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
++			 *bug_id, raw_smp_processor_id(), csd->dst);
++		return true;
++	}
++
++	ts2 = sched_clock();
++	ts_delta = ts2 - *ts1;
++	if (likely(ts_delta <= CSD_LOCK_TIMEOUT))
++		return false;
++
++	firsttime = !*bug_id;
++	if (firsttime)
++		*bug_id = atomic_inc_return(&csd_bug_count);
++	cpu = csd->dst;
++	if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
++		cpux = 0;
++	else
++		cpux = cpu;
++	cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
++	pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
++		 firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
++		 cpu, csd->func, csd->info);
++	if (cpu_cur_csd && csd != cpu_cur_csd) {
++		pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
++			 *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
++			 READ_ONCE(per_cpu(cur_csd_info, cpux)));
++	} else {
++		pr_alert("\tcsd: CSD lock (#%d) %s.\n",
++			 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
++	}
++	if (cpu >= 0) {
++		if (!trigger_single_cpu_backtrace(cpu))
++			dump_cpu_task(cpu);
++		if (!cpu_cur_csd) {
++			pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
++			arch_send_call_function_single_ipi(cpu);
++		}
++	}
++	dump_stack();
++	*ts1 = ts2;
++
++	return false;
++}
++
+ /*
+  * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
+  *
+@@ -104,10 +191,30 @@ void __init call_function_init(void)
+  * previous function call. For multi-cpu calls its even more interesting
+  * as we'll have to ensure no other cpu is observing our csd.
+  */
++static __always_inline void csd_lock_wait(call_single_data_t *csd)
++{
++	int bug_id = 0;
++	u64 ts0, ts1;
++
++	ts1 = ts0 = sched_clock();
++	for (;;) {
++		if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
++			break;
++		cpu_relax();
++	}
++	smp_acquire__after_ctrl_dep();
++}
++
++#else
++static void csd_lock_record(call_single_data_t *csd)
++{
++}
++
+ static __always_inline void csd_lock_wait(call_single_data_t *csd)
+ {
+ 	smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
+ }
++#endif
+ 
+ static __always_inline void csd_lock(call_single_data_t *csd)
+ {
+@@ -149,9 +256,11 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
+ 		 * We can unlock early even for the synchronous on-stack case,
+ 		 * since we're doing this from the same CPU..
+ 		 */
++		csd_lock_record(csd);
+ 		csd_unlock(csd);
+ 		local_irq_save(flags);
+ 		func(info);
++		csd_lock_record(NULL);
+ 		local_irq_restore(flags);
+ 		return 0;
+ 	}
+@@ -164,7 +277,8 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
+ 
+ 	csd->func = func;
+ 	csd->info = info;
+-#ifdef CONFIG_64BIT
++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
++	csd->src = smp_processor_id();
+ 	csd->dst = cpu;
+ #endif
+ 
+@@ -242,6 +351,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
+ 		smp_call_func_t func = csd->func;
+ 		void *info = csd->info;
+ 
++		csd_lock_record(csd);
+ 		/* Do we wait until *after* callback? */
+ 		if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
+ 			func(info);
+@@ -250,6 +360,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
+ 			csd_unlock(csd);
+ 			func(info);
+ 		}
++		csd_lock_record(NULL);
+ 	}
+ 
+ 	/*
+@@ -543,7 +657,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
+ 			csd->flags |= CSD_FLAG_SYNCHRONOUS;
+ 		csd->func = func;
+ 		csd->info = info;
+-#ifdef CONFIG_64BIT
++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
++		csd->src = smp_processor_id();
+ 		csd->dst = cpu;
+ #endif
+ 		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
+diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
+index e068c3c7189a..86a35fdfe021 100644
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -1367,6 +1367,17 @@ config WW_MUTEX_SELFTEST
+ 	  Say M if you want these self tests to build as a module.
+ 	  Say N if you are unsure.
+ 
++config CSD_LOCK_WAIT_DEBUG
++	bool "Debugging for csd_lock_wait(), called from smp_call_function*()"
++	depends on DEBUG_KERNEL
++	depends on 64BIT
++	default n
++	help
++	  This option enables debug prints when CPUs are slow to respond
++	  to the smp_call_function*() IPI wrappers.  These debug prints
++	  include the IPI handler function currently executing (if any)
++	  and relevant stack traces.
++
+ endmenu # lock debugging
+ 
+ config TRACE_IRQFLAGS
+-- 
+2.26.2
+
diff --git a/patches.suse/kernel-smp-add-boot-parameter-for-controlling-CSD.patch b/patches.suse/kernel-smp-add-boot-parameter-for-controlling-CSD.patch
new file mode 100644
index 0000000..77f1638
--- /dev/null
+++ b/patches.suse/kernel-smp-add-boot-parameter-for-controlling-CSD.patch
@@ -0,0 +1,132 @@
+Patch-mainline: Submitted, 1 Mar 2021 14:17:33, lkml
+References: bsc#1180846
+From: Juergen Gross <jgross@suse.com>
+Date: Mon, 1 Mar 2021 14:17:33 +0100
+Subject: [PATCH 1/4] kernel/smp: add boot parameter for controlling CSD
+ lock debugging
+
+Currently CSD lock debugging can be switched on and off via a kernel
+config option only. Unfortunately there is at least one problem with
+CSD lock handling pending for about 2 years now, which has been seen
+in different environments (mostly when running virtualized under KVM
+or Xen, at least once on bare metal). Multiple attempts to catch this
+issue have finally led to introduction of CSD lock debug code, but
+this code is not in use in most distros as it has some impact on
+performance.
+
+In order to be able to ship kernels with CONFIG_CSD_LOCK_WAIT_DEBUG
+enabled even for production use, add a boot parameter for switching
+the debug functionality on. This will reduce any performance impact
+of the debug coding to a bare minimum when not being used.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+---
+ .../admin-guide/kernel-parameters.txt         |  6 +++
+ kernel/smp.c                                  | 38 +++++++++++++++++--
+ 2 files changed, 40 insertions(+), 4 deletions(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 04545725f187..31dbf7b2f0e8 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -784,6 +784,12 @@
+ 	cs89x0_media=	[HW,NET]
+ 			Format: { rj45 | aui | bnc }
+ 
++	csdlock_debug=	[KNL] Enable debug add-ons of cross-cpu function call
++			handling. When switched on additional debug data is
++			printed to the console in case a hanging cpu is
++			detected and that cpu is pinged again in order to try
++			to resolve the hang situation.
++
+ 	dasd=		[HW,NET]
+ 			See header of drivers/s390/block/dasd_devmap.c.
+ 
+diff --git a/kernel/smp.c b/kernel/smp.c
+index aeb0adfa0606..d5f0b21ab55e 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -24,6 +24,7 @@
+ #include <linux/sched/clock.h>
+ #include <linux/nmi.h>
+ #include <linux/sched/debug.h>
++#include <linux/jump_label.h>
+ 
+ #include "smpboot.h"
+ 
+@@ -102,6 +103,20 @@ void __init call_function_init(void)
+ 
+ #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+ 
++static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled);
++
++static int __init csdlock_debug(char *str)
++{
++	unsigned int val = 0;
++
++	get_option(&str, &val);
++	if (val)
++		static_branch_enable(&csdlock_debug_enabled);
++
++	return 0;
++}
++early_param("csdlock_debug", csdlock_debug);
++
+ static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
+ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
+ static DEFINE_PER_CPU(void *, cur_csd_info);
+@@ -110,7 +125,7 @@ static DEFINE_PER_CPU(void *, cur_csd_info);
+ atomic_t csd_bug_count = ATOMIC_INIT(0);
+ 
+ /* Record current CSD work for current CPU, NULL to erase. */
+-static void csd_lock_record(call_single_data_t *csd)
++static void __csd_lock_record(call_single_data_t *csd)
+ {
+ 	if (!csd) {
+ 		smp_mb(); /* NULL cur_csd after unlock. */
+@@ -125,10 +140,16 @@ static void csd_lock_record(call_single_data_t *csd)
+ 		  /* Or before unlock, as the case may be. */
+ }
+ 
++static __always_inline void csd_lock_record(call_single_data_t *csd)
++{
++	if (static_branch_unlikely(&csdlock_debug_enabled))
++		__csd_lock_record(csd);
++}
++
+ /*
+  * Complain if too much time spent waiting.
+  */
+-static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
++static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
+ {
+ 	int cpu = -1;
+ 	int cpux;
+@@ -204,7 +225,7 @@ static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 t
+  * previous function call. For multi-cpu calls its even more interesting
+  * as we'll have to ensure no other cpu is observing our csd.
+  */
+-static __always_inline void csd_lock_wait(call_single_data_t *csd)
++static void __csd_lock_wait(call_single_data_t *csd)
+ {
+ 	int bug_id = 0;
+ 	u64 ts0, ts1;
+@@ -218,6 +239,15 @@ static __always_inline void csd_lock_wait(call_single_data_t *csd)
+ 	smp_acquire__after_ctrl_dep();
+ }
+ 
++static __always_inline void csd_lock_wait(call_single_data_t *csd)
++{
++	if (static_branch_unlikely(&csdlock_debug_enabled)) {
++		__csd_lock_wait(csd);
++		return;
++	}
++
++	smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
++}
+ #else
+ static void csd_lock_record(call_single_data_t *csd)
+ {
+-- 
+2.26.2
+
diff --git a/patches.suse/kernel-smp-add-more-data-to-CSD-lock-debugging.patch b/patches.suse/kernel-smp-add-more-data-to-CSD-lock-debugging.patch
new file mode 100644
index 0000000..a0bde90
--- /dev/null
+++ b/patches.suse/kernel-smp-add-more-data-to-CSD-lock-debugging.patch
@@ -0,0 +1,431 @@
+Patch-mainline: Submitted, 1 Mar 2021 14:17:33, lkml
+References: bsc#1180846
+From: Juergen Gross <jgross@suse.com>
+Date: Mon, 1 Mar 2021 14:17:33 +0100
+Subject: [PATCH 3/4] kernel/smp: add more data to CSD lock debugging
+
+In order to help identifying problems with IPI handling and remote
+function execution add some more data to IPI debugging code.
+
+There have been multiple reports of cpus looping long times (many
+seconds) in smp_call_function_many() waiting for another cpu executing
+a function like tlb flushing. Most of these reports have been for
+cases where the kernel was running as a guest on top of KVM or Xen
+(there are rumours of that happening under VMWare, too, and even on
+bare metal).
+
+Finding the root cause hasn't been successful yet, even after more than
+2 years of chasing this bug by different developers.
+
+Commit 35feb60474bf4f7 ("kernel/smp: Provide CSD lock timeout
+diagnostics") tried to address this by adding some debug code and by
+issuing another IPI when a hang was detected. This helped mitigating
+the problem (the repeated IPI unlocks the hang), but the root cause is
+still unknown.
+
+Current available data suggests that either an IPI wasn't sent when it
+should have been, or that the IPI didn't result in the target cpu
+executing the queued function (due to the IPI not reaching the cpu,
+the IPI handler not being called, or the handler not seeing the queued
+request).
+
+Try to add more diagnostic data by introducing a global atomic counter
+which is being incremented when doing critical operations (before and
+after queueing a new request, when sending an IPI, and when dequeueing
+a request). The counter value is stored in percpu variables which can
+be printed out when a hang is detected.
+
+The data of the last event (consisting of sequence counter, source
+cpu, target cpu, and event type) is stored in a global variable. When
+a new event is to be traced, the data of the last event is stored in
+the event related percpu location and the global data is updated with
+the new event's data. This allows to track two events in one data
+location: one by the value of the event data (the event before the
+current one), and one by the location itself (the current event).
+
+A typical printout with a detected hang will look like this:
+
+csd: Detected non-responsive CSD lock (#1) on CPU#1, waiting 5000000003 ns for CPU#06 scf_handler_1+0x0/0x50(0xffffa2a881bb1410).
+	csd: CSD lock (#1) handling prior scf_handler_1+0x0/0x50(0xffffa2a8813823c0) request.
+        csd: cnt(00008cc): ffff->0000 dequeue (src cpu 0 == empty)
+        csd: cnt(00008cd): ffff->0006 idle
+        csd: cnt(0003668): 0001->0006 queue
+        csd: cnt(0003669): 0001->0006 ipi
+        csd: cnt(0003e0f): 0007->000a queue
+        csd: cnt(0003e10): 0001->ffff ping
+        csd: cnt(0003e71): 0003->0000 ping
+        csd: cnt(0003e72): ffff->0006 gotipi
+        csd: cnt(0003e73): ffff->0006 handle
+        csd: cnt(0003e74): ffff->0006 dequeue (src cpu 0 == empty)
+        csd: cnt(0003e7f): 0004->0006 ping
+        csd: cnt(0003e80): 0001->ffff pinged
+        csd: cnt(0003eb2): 0005->0001 noipi
+        csd: cnt(0003eb3): 0001->0006 queue
+        csd: cnt(0003eb4): 0001->0006 noipi
+        csd: cnt now: 0003f00
+
+This example (being an artificial one, produced with a previous version
+of this patch without the "hdlend" event), shows that cpu#6 started to
+handle an IPI (cnt 3e72-3e74), bit didn't start to handle another IPI
+(sent by cpu#4, cnt 3e7f). The next request from cpu#1 for cpu#6 was
+queued (3eb3), but no IPI was needed (cnt 3eb4, there was the event
+from cpu#4 in the queue already).
+
+The idea is to print only relevant entries. Those are all events which
+are associated with the hang (so sender side events for the source cpu
+of the hanging request, and receiver side events for the target cpu),
+and the related events just before those (for adding data needed to
+identify a possible race). Printing all available data would be
+possible, but this would add large amounts of data printed on larger
+configurations.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Tested-by: Paul E. McKenney <paulmck@kernel.org>
+---
+ .../admin-guide/kernel-parameters.txt         |   4 +
+ kernel/smp.c                                  | 228 +++++++++++++++++-
+ 2 files changed, 228 insertions(+), 4 deletions(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 31dbf7b2f0e8..80c72f8e780d 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -789,6 +789,10 @@
+ 			printed to the console in case a hanging cpu is
+ 			detected and that cpu is pinged again in order to try
+ 			to resolve the hang situation.
++			0: disable csdlock debugging (default)
++			1: enable basic csdlock debugging (minor impact)
++			ext: enable extended csdlock debugging (more impact,
++			     but more data)
+ 
+ 	dasd=		[HW,NET]
+ 			See header of drivers/s390/block/dasd_devmap.c.
+diff --git a/kernel/smp.c b/kernel/smp.c
+index 6d7e6dbe33dc..1a96691dbf7f 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -31,8 +31,59 @@
+ 	CSD_FLAG_SYNCHRONOUS	= 0x02,
+ };
+ 
++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
++union cfd_seq_cnt {
++	u64		val;
++	struct {
++		u64	src:16;
++		u64	dst:16;
++#define CFD_SEQ_NOCPU	0xffff
++		u64	type:4;
++#define CFD_SEQ_QUEUE	0
++#define CFD_SEQ_IPI	1
++#define CFD_SEQ_NOIPI	2
++#define CFD_SEQ_PING	3
++#define CFD_SEQ_PINGED	4
++#define CFD_SEQ_HANDLE	5
++#define CFD_SEQ_DEQUEUE	6
++#define CFD_SEQ_IDLE	7
++#define CFD_SEQ_GOTIPI	8
++#define CFD_SEQ_HDLEND	9
++		u64	cnt:28;
++	}		u;
++};
++
++static char *seq_type[] = {
++	[CFD_SEQ_QUEUE]		= "queue",
++	[CFD_SEQ_IPI]		= "ipi",
++	[CFD_SEQ_NOIPI]		= "noipi",
++	[CFD_SEQ_PING]		= "ping",
++	[CFD_SEQ_PINGED]	= "pinged",
++	[CFD_SEQ_HANDLE]	= "handle",
++	[CFD_SEQ_DEQUEUE]	= "dequeue (src cpu 0 == empty)",
++	[CFD_SEQ_IDLE]		= "idle",
++	[CFD_SEQ_GOTIPI]	= "gotipi",
++	[CFD_SEQ_HDLEND]	= "hdlend (src cpu 0 == early)",
++};
++
++struct cfd_seq_local {
++	u64	ping;
++	u64	pinged;
++	u64	handle;
++	u64	dequeue;
++	u64	idle;
++	u64	gotipi;
++	u64	hdlend;
++};
++#endif
++
+ struct cfd_percpu {
+ 	call_single_data_t	csd;
++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
++	u64	seq_queue;
++	u64	seq_ipi;
++	u64	seq_noipi;
++#endif
+ };
+ 
+ struct call_function_data {
+@@ -108,12 +159,18 @@ void __init call_function_init(void)
+ #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+ 
+ static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled);
++static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended);
+ 
+ static int __init csdlock_debug(char *str)
+ {
+ 	unsigned int val = 0;
+ 
+-	get_option(&str, &val);
++	if (str && !strcmp(str, "ext")) {
++		val = 1;
++		static_branch_enable(&csdlock_debug_extended);
++	} else
++		get_option(&str, &val);
++
+ 	if (val)
+ 		static_branch_enable(&csdlock_debug_enabled);
+ 
+@@ -124,9 +181,34 @@ early_param("csdlock_debug", csdlock_debug);
+ static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
+ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
+ static DEFINE_PER_CPU(void *, cur_csd_info);
++static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local);
+ 
+ #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
+ atomic_t csd_bug_count = ATOMIC_INIT(0);
++static u64 cfd_seq;
++
++#define CFD_SEQ(s, d, t, c)	\
++	(union cfd_seq_cnt){ .u.src = s, .u.dst = d, .u.type = t, .u.cnt = c }
++
++static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type)
++{
++	union cfd_seq_cnt new, old;
++
++	new = CFD_SEQ(src, dst, type, 0);
++
++	do {
++		old.val = READ_ONCE(cfd_seq);
++		new.u.cnt = old.u.cnt + 1;
++	} while (cmpxchg(&cfd_seq, old.val, new.val) != old.val);
++
++	return old.val;
++}
++
++#define cfd_seq_store(var, src, dst, type)				\
++	do {								\
++		if (static_branch_unlikely(&csdlock_debug_extended))	\
++			var = cfd_seq_inc(src, dst, type);		\
++	} while (0)
+ 
+ /* Record current CSD work for current CPU, NULL to erase. */
+ static void __csd_lock_record(call_single_data_t *csd)
+@@ -160,6 +242,88 @@ static __always_inline void csd_lock_record(call_single_data_t *csd)
+ 		__csd_lock_record(csd);
+ }
+ 
++static void cfd_seq_data_add(u64 val, unsigned int src, unsigned int dst,
++			     unsigned int type, union cfd_seq_cnt *data,
++			     unsigned int *n_data, unsigned int now)
++{
++	union cfd_seq_cnt new[2];
++	unsigned int i, j, k;
++
++	new[0].val = val;
++	new[1] = CFD_SEQ(src, dst, type, new[0].u.cnt + 1);
++
++	for (i = 0; i < 2; i++) {
++		if (new[i].u.cnt <= now)
++			new[i].u.cnt |= 0x80000000U;
++		for (j = 0; j < *n_data; j++) {
++			if (new[i].u.cnt == data[j].u.cnt) {
++				/* Direct read value trumps generated one. */
++				if (i == 0)
++					data[j].val = new[i].val;
++				break;
++			}
++			if (new[i].u.cnt < data[j].u.cnt) {
++				for (k = *n_data; k > j; k--)
++					data[k].val = data[k - 1].val;
++				data[j].val = new[i].val;
++				(*n_data)++;
++				break;
++			}
++		}
++		if (j == *n_data) {
++			data[j].val = new[i].val;
++			(*n_data)++;
++		}
++	}
++}
++
++static const char *csd_lock_get_type(unsigned int type)
++{
++	return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type];
++}
++
++static void csd_lock_print_extended(call_single_data_t *csd, int cpu)
++{
++	struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu);
++	unsigned int srccpu = csd->src;
++	struct call_function_data *cfd = per_cpu_ptr(&cfd_data, srccpu);
++	struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
++	unsigned int now;
++	union cfd_seq_cnt data[2 * ARRAY_SIZE(seq_type)];
++	unsigned int n_data = 0, i;
++
++	data[0].val = READ_ONCE(cfd_seq);
++	now = data[0].u.cnt;
++
++	cfd_seq_data_add(pcpu->seq_queue, srccpu, cpu,
++			 CFD_SEQ_QUEUE, data, &n_data, now);
++	cfd_seq_data_add(pcpu->seq_ipi, srccpu, cpu,
++			 CFD_SEQ_IPI, data, &n_data, now);
++	cfd_seq_data_add(pcpu->seq_noipi, srccpu, cpu,
++			 CFD_SEQ_NOIPI, data, &n_data, now);
++	cfd_seq_data_add(per_cpu(cfd_seq_local.ping, srccpu), srccpu,
++			 CFD_SEQ_NOCPU, CFD_SEQ_PING, data, &n_data, now);
++	cfd_seq_data_add(per_cpu(cfd_seq_local.pinged, srccpu), srccpu,
++			 CFD_SEQ_NOCPU, CFD_SEQ_PINGED, data, &n_data, now);
++	cfd_seq_data_add(seq->idle, CFD_SEQ_NOCPU, cpu,
++			 CFD_SEQ_IDLE, data, &n_data, now);
++	cfd_seq_data_add(seq->gotipi, CFD_SEQ_NOCPU, cpu,
++			 CFD_SEQ_GOTIPI, data, &n_data, now);
++	cfd_seq_data_add(seq->handle, CFD_SEQ_NOCPU, cpu,
++			 CFD_SEQ_HANDLE, data, &n_data, now);
++	cfd_seq_data_add(seq->dequeue, CFD_SEQ_NOCPU, cpu,
++			 CFD_SEQ_DEQUEUE, data, &n_data, now);
++	cfd_seq_data_add(seq->hdlend, CFD_SEQ_NOCPU, cpu,
++			 CFD_SEQ_HDLEND, data, &n_data, now);
++
++	for (i = 0; i < n_data; i++) {
++		pr_alert("\tcsd: cnt(%07x): %04x->%04x %s\n",
++			 data[i].u.cnt & ~0x80000000U, data[i].u.src,
++			 data[i].u.dst, csd_lock_get_type(data[i].u.type));
++	}
++	pr_alert("\tcsd: cnt now: %07x\n", now);
++}
++
+ /*
+  * Complain if too much time spent waiting.
+  */
+@@ -209,6 +373,8 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
+ 			 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
+ 	}
+ 	if (cpu >= 0) {
++		if (static_branch_unlikely(&csdlock_debug_extended))
++			csd_lock_print_extended(csd, cpu);
+ 		if (!trigger_single_cpu_backtrace(cpu))
+ 			dump_cpu_task(cpu);
+ 		if (!cpu_cur_csd) {
+@@ -252,7 +418,27 @@ static __always_inline void csd_lock_wait(call_single_data_t *csd)
+ 
+ 	smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
+ }
++
++static void __smp_call_single_queue_debug(int cpu, struct llist_node *node)
++{
++	unsigned int this_cpu = smp_processor_id();
++	struct cfd_seq_local *seq = this_cpu_ptr(&cfd_seq_local);
++	struct call_function_data *cfd = this_cpu_ptr(&cfd_data);
++	struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
++
++	cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
++	if (llist_add(node, &per_cpu(call_single_queue, cpu))) {
++		cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
++		cfd_seq_store(seq->ping, this_cpu, cpu, CFD_SEQ_PING);
++		arch_send_call_function_single_ipi(cpu);
++		cfd_seq_store(seq->pinged, this_cpu, cpu, CFD_SEQ_PINGED);
++	} else {
++		cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
++	}
++}
+ #else
++#define cfd_seq_store(var, src, dst, type)
++
+ static void csd_lock_record(call_single_data_t *csd)
+ {
+ }
+@@ -335,6 +521,13 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
+ 	csd->dst = cpu;
+ #endif
+ 
++#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
++	if (static_branch_unlikely(&csdlock_debug_extended)) {
++		__smp_call_single_queue_debug(cpu, &csd->llist);
++		return 0;
++	}
++#endif
++
+ 	/*
+ 	 * The list addition should be visible before sending the IPI
+ 	 * handler locks the list to pull the entry off it because of
+@@ -348,6 +541,8 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
+  */
+ void generic_smp_call_function_single_interrupt(void)
+ {
++	cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU,
++		      smp_processor_id(), CFD_SEQ_GOTIPI);
+ 	flush_smp_call_function_queue(true);
+ }
+ 
+@@ -375,7 +570,13 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
+ 	lockdep_assert_irqs_disabled();
+ 
+ 	head = this_cpu_ptr(&call_single_queue);
++	cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->handle, CFD_SEQ_NOCPU,
++		      smp_processor_id(), CFD_SEQ_HANDLE);
+ 	entry = llist_del_all(head);
++	cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->dequeue,
++		      /* Special meaning of source cpu: 0 == queue empty */
++		      entry ? CFD_SEQ_NOCPU : 0,
++		      smp_processor_id(), CFD_SEQ_DEQUEUE);
+ 	entry = llist_reverse_order(entry);
+ 
+ 	/* There shouldn't be any pending callbacks on an offline CPU. */
+@@ -473,6 +678,9 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
+ 	 * for them.
+ 	 */
+ 	irq_work_run();
++
++	cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, CFD_SEQ_NOCPU,
++		      smp_processor_id(), CFD_SEQ_HDLEND);
+ }
+ 
+ /*
+@@ -698,7 +908,8 @@ static void smp_call_function_many(const struct cpumask *mask,
+ 
+ 	cpumask_clear(cfd->cpumask_ipi);
+ 	for_each_cpu(cpu, cfd->cpumask) {
+-		call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
++		struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
++		call_single_data_t *csd = &pcpu->csd;
+ 
+ 		csd_lock(csd);
+ 		if (wait)
+@@ -712,12 +923,21 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
+ 		csd->src = smp_processor_id();
+ 		csd->dst = cpu;
+ #endif
+-		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
++		cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
++		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) {
+ 			__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
++			cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
++		} else {
++			cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
++		}
+ 	}
+ 
+ 	/* Send a message to all CPUs in the map */
++	cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu,
++		      CFD_SEQ_NOCPU, CFD_SEQ_PING);
+ 	arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
++	cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu,
++		      CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
+ 
+ 	if (wait) {
+ 		for_each_cpu(cpu, cfd->cpumask) {
+-- 
+2.26.2
+
diff --git a/patches.suse/kernel-smp-prepare-more-CSD-lock-debugging.patch b/patches.suse/kernel-smp-prepare-more-CSD-lock-debugging.patch
new file mode 100644
index 0000000..8d072ca
--- /dev/null
+++ b/patches.suse/kernel-smp-prepare-more-CSD-lock-debugging.patch
@@ -0,0 +1,73 @@
+Patch-mainline: Submitted, 1 Mar 2021 14:17:33, lkml
+References: bsc#1180846
+From: Juergen Gross <jgross@suse.com>
+Subject: [PATCH 2/4] kernel/smp: prepare more CSD lock debugging
+
+In order to be able to easily add more CSD lock debugging data to
+struct call_function_data->csd move the call_single_data_t element
+into a sub-structure.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+---
+ kernel/smp.c | 16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/smp.c b/kernel/smp.c
+index d5f0b21ab55e..6d7e6dbe33dc 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -31,8 +31,12 @@
+ 	CSD_FLAG_SYNCHRONOUS	= 0x02,
+ };
+ 
++struct cfd_percpu {
++	call_single_data_t	csd;
++};
++
+ struct call_function_data {
+-	call_single_data_t	__percpu *csd;
++	struct cfd_percpu	__percpu *pcpu;
+ 	cpumask_var_t		cpumask;
+ 	cpumask_var_t		cpumask_ipi;
+ };
+@@ -55,8 +59,8 @@ int smpcfd_prepare_cpu(unsigned int cpu)
+ 		free_cpumask_var(cfd->cpumask);
+ 		return -ENOMEM;
+ 	}
+-	cfd->csd = alloc_percpu(call_single_data_t);
+-	if (!cfd->csd) {
++	cfd->pcpu = alloc_percpu(struct cfd_percpu);
++	if (!cfd->pcpu) {
+ 		free_cpumask_var(cfd->cpumask);
+ 		free_cpumask_var(cfd->cpumask_ipi);
+ 		return -ENOMEM;
+@@ -71,7 +75,7 @@ int smpcfd_dead_cpu(unsigned int cpu)
+ 
+ 	free_cpumask_var(cfd->cpumask);
+ 	free_cpumask_var(cfd->cpumask_ipi);
+-	free_percpu(cfd->csd);
++	free_percpu(cfd->pcpu);
+ 	return 0;
+ }
+ 
+@@ -694,7 +698,7 @@ static void smp_call_function_many(const struct cpumask *mask,
+ 
+ 	cpumask_clear(cfd->cpumask_ipi);
+ 	for_each_cpu(cpu, cfd->cpumask) {
+-		call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
++		call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
+ 
+ 		csd_lock(csd);
+ 		if (wait)
+@@ -719,7 +723,7 @@ static void smp_call_function_many(const struct cpumask *mask,
+ 		for_each_cpu(cpu, cfd->cpumask) {
+ 			call_single_data_t *csd;
+ 
+-			csd = per_cpu_ptr(cfd->csd, cpu);
++			csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
+ 			csd_lock_wait(csd);
+ 		}
+ 	}
+-- 
+2.26.2
+
diff --git a/patches.suse/smp-Add-source-and-destination-CPUs-to-__call_single.patch b/patches.suse/smp-Add-source-and-destination-CPUs-to-__call_single.patch
new file mode 100644
index 0000000..7fb5824
--- /dev/null
+++ b/patches.suse/smp-Add-source-and-destination-CPUs-to-__call_single.patch
@@ -0,0 +1,74 @@
+Patch-mainline: v5.10-rc1
+Git-commit: e48c15b796d412ede883bb2ef7779b2a142f7962
+References: bsc#1180846
+From: "Paul E. McKenney" <paulmck@kernel.org>
+Date: Mon, 29 Jun 2020 17:21:32 -0700
+Subject: [PATCH] smp: Add source and destination CPUs to __call_single_data
+
+This commit adds a destination CPU to __call_single_data, and is inspired
+by an earlier commit by Peter Zijlstra.  This version adds #ifdef to
+permit use by 32-bit systems and supplying the destination CPU for all
+smp_call_function*() requests, not just smp_call_function_single().
+
+If need be, 32-bit systems could be accommodated by shrinking the flags
+field to 16 bits (the atomic_t variant is currently unused) and by
+providing only eight bits for CPU on such systems.
+
+It is not clear that the addition of the fields to __call_single_node
+are really needed.
+
+[ paulmck: Apply Boqun Feng feedback on 32-bit builds. ]
+Link: https://lore.kernel.org/lkml/20200615164048.GC2531@hirez.programming.kicks-ass.net/
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+---
+ include/linux/smp.h       | 3 +++
+ kernel/smp.c              | 6 ++++++
+ 2 files changed, 9 insertions(+)
+
+diff --git a/include/linux/smp.h b/include/linux/smp.h
+index 80d557ef8a11..9f13966d3d92 100644
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -20,6 +20,9 @@ struct __call_single_data {
+ 	smp_call_func_t func;
+ 	void *info;
+ 	unsigned int flags;
++#ifdef CONFIG_64BIT
++	u16 src, dst;
++#endif
+ };
+ 
+ /* Use __aligned() to avoid to use 2 cache lines for 1 csd */
+diff --git a/kernel/smp.c b/kernel/smp.c
+index d0ae8eb6bf8b..865a876f83ce 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -164,6 +164,9 @@ int generic_exec_single(int cpu, call_single_data_t *csd,
+ 
+ 	csd->func = func;
+ 	csd->info = info;
++#ifdef CONFIG_64BIT
++	csd->dst = cpu;
++#endif
+ 
+ 	/*
+ 	 * The list addition should be visible before sending the IPI
+@@ -470,6 +473,9 @@ static void smp_call_function_many(const struct cpumask *mask,
+ 			csd->flags |= CSD_FLAG_SYNCHRONOUS;
+ 		csd->func = func;
+ 		csd->info = info;
++#ifdef CONFIG_64BIT
++		csd->dst = cpu;
++#endif
+ 		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
+ 			__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
+ 	}
+-- 
+2.26.2
+
diff --git a/series.conf b/series.conf
index ade425b..e202f1c 100644
--- a/series.conf
+++ b/series.conf
@@ -16412,6 +16412,8 @@
 	patches.suse/ubifs-dent-Fix-some-potential-memory-leaks-while-ite.patch
 	patches.suse/ubifs-journal-Make-sure-to-not-dirty-twice-for-auth-.patch
 	patches.suse/mailbox-avoid-timer-start-from-callback.patch
+	patches.suse/smp-Add-source-and-destination-CPUs-to-__call_single.patch
+	patches.suse/kernel-smp-Provide-CSD-lock-timeout-diagnostics.patch
 	patches.suse/xfs-limit-entries-returned-when-counting-fsmap-recor.patch
 	patches.suse/xfs-fix-high-key-handling-in-the-rt-allocator-s-quer.patch
 	patches.suse/xen-events-avoid-removing-an-event-channel-while-han.patch
@@ -18038,6 +18040,9 @@
 	patches.suse/btrfs-remove-btrfs_inode-from-btrfs_delayed_inode_reserve_metadata.patch
 	patches.suse/btrfs-simplify-code-flow-in-btrfs_delayed_inode_reserve_metadata.patch
 	patches.suse/btrfs-unlock-extents-in-btrfs_zero_range-in-case-of-errors.patch
+	patches.suse/kernel-smp-add-boot-parameter-for-controlling-CSD.patch
+	patches.suse/kernel-smp-prepare-more-CSD-lock-debugging.patch
+	patches.suse/kernel-smp-add-more-data-to-CSD-lock-debugging.patch
 
 	########################################################
 	# kbuild/module infrastructure fixes
@@ -18539,6 +18544,7 @@
 	patches.kabi/powerpc-kABI-add-back-suspend_disable_cpu-in-machdep.patch
 	patches.kabi/mm-thp-kABI-move-the-added-flag-to-the-end-of-enum.patch
 	patches.kabi/kabi-repair-after-nvmx-emulate-mtf-when-performing-instruction-emulation
+	patches.kabi/fix-kabi-after-call_single_data-modification.patch
 
 	########################################################
 	# You'd better have a good reason for adding a patch