Blob Blame History Raw
From 4a19197da8a27b85fa4133a0af4f6f5b5f4e7f62 Mon Sep 17 00:00:00 2001
From: Daniel Wagner <dwagner@suse.de>
Date: Wed, 4 Nov 2020 16:00:20 +0100
Subject: [PATCH] kexec: Do not take mutex when crashing kernel
Patch-mainline: Never, RT specific
References: bsc#1177698

In case the current context is a interrupt context, it's not possible
to take mutex. Instead rely on cmpxchg to avoid the situation where
two CPUs panic at the same time.

Link: https://lore.kernel.org/lkml/51f3b288-260b-a800-6a47-51d93f892c3d@jv-coder.de/
Patch-by: Joerg Vehlow <lkml@jv-coder.de>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 kernel/kexec.c          |  8 ++--
 kernel/kexec_core.c     | 84 +++++++++++++++++++++++++++--------------
 kernel/kexec_file.c     |  4 +-
 kernel/kexec_internal.h |  5 ++-
 4 files changed, 66 insertions(+), 35 deletions(-)

--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -251,12 +251,12 @@ SYSCALL_DEFINE4(kexec_load, unsigned lon
 	 *
 	 * KISS: always take the mutex.
 	 */
-	if (!mutex_trylock(&kexec_mutex))
+	if (!kexec_trylock())
 		return -EBUSY;
 
 	result = do_kexec_load(entry, nr_segments, segments, flags);
 
-	mutex_unlock(&kexec_mutex);
+	kexec_unlock();
 
 	return result;
 }
@@ -305,12 +305,12 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compa
 	 *
 	 * KISS: always take the mutex.
 	 */
-	if (!mutex_trylock(&kexec_mutex))
+	if (!kexec_trylock())
 		return -EBUSY;
 
 	result = do_kexec_load(entry, nr_segments, ksegments, flags);
 
-	mutex_unlock(&kexec_mutex);
+	kexec_unlock();
 
 	return result;
 }
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -44,7 +44,7 @@
 #include <crypto/hash.h>
 #include "kexec_internal.h"
 
-DEFINE_MUTEX(kexec_mutex);
+static DEFINE_MUTEX(kexec_mutex);
 
 /* Per cpu memory for storing cpu states in case of system crash. */
 note_buf_t __percpu *crash_notes;
@@ -69,6 +69,43 @@ struct resource crashk_low_res = {
 	.desc  = IORES_DESC_CRASH_KERNEL
 };
 
+void kexec_lock(void)
+{
+	/*
+	 * LOCK kexec_mutex cmpxchg(&panic_cpu, INVALID, cpu)
+	 * MB                          MB
+	 * panic_cpu == INVALID        kexec_mutex == LOCKED
+	 *
+	 * Ensures either we observe the cmpxchg, or crash_kernel() observes
+	 * our lock acquisition.
+	 */
+	mutex_lock(&kexec_mutex);
+	smp_mb();
+	atomic_cond_read_acquire(&panic_cpu, VAL == PANIC_CPU_INVALID);
+}
+
+int kexec_trylock(void) {
+	if (!mutex_trylock(&kexec_mutex)) {
+		return 0;
+	}
+	smp_mb();
+	if (atomic_read(&panic_cpu) != PANIC_CPU_INVALID) {
+		mutex_unlock(&kexec_mutex);
+		return 0;
+	}
+	return 1;
+}
+
+void kexec_unlock(void)
+{
+	mutex_unlock(&kexec_mutex);
+}
+
+int kexec_is_locked(void)
+{
+	return mutex_is_locked(&kexec_mutex);
+}
+
 int kexec_should_crash(struct task_struct *p)
 {
 	/*
@@ -936,24 +973,13 @@ int kexec_load_disabled;
  */
 void __noclone __crash_kexec(struct pt_regs *regs)
 {
-	/* Take the kexec_mutex here to prevent sys_kexec_load
-	 * running on one cpu from replacing the crash kernel
-	 * we are using after a panic on a different cpu.
-	 *
-	 * If the crash kernel was not located in a fixed area
-	 * of memory the xchg(&kexec_crash_image) would be
-	 * sufficient.  But since I reuse the memory...
-	 */
-	if (mutex_trylock(&kexec_mutex)) {
-		if (kexec_crash_image) {
-			struct pt_regs fixed_regs;
-
-			crash_setup_regs(&fixed_regs, regs);
-			crash_save_vmcoreinfo();
-			machine_crash_shutdown(&fixed_regs);
-			machine_kexec(kexec_crash_image);
-		}
-		mutex_unlock(&kexec_mutex);
+	if (!kexec_is_locked() && kexec_crash_image) {
+		struct pt_regs fixed_regs;
+
+		crash_setup_regs(&fixed_regs, regs);
+		crash_save_vmcoreinfo();
+		machine_crash_shutdown(&fixed_regs);
+		machine_kexec(kexec_crash_image);
 	}
 }
 STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -970,9 +996,11 @@ void crash_kexec(struct pt_regs *regs)
 	this_cpu = raw_smp_processor_id();
 	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
 	if (old_cpu == PANIC_CPU_INVALID) {
-		/* This is the 1st CPU which comes here, so go ahead. */
-		printk_safe_flush_on_panic();
-		__crash_kexec(regs);
+		if (!kexec_is_locked()) {
+			/* This is the 1st CPU which comes here, so go ahead. */
+			printk_safe_flush_on_panic();
+			__crash_kexec(regs);
+		}
 
 		/*
 		 * Reset panic_cpu to allow another panic()/crash_kexec()
@@ -986,10 +1014,10 @@ size_t crash_get_memory_size(void)
 {
 	size_t size = 0;
 
-	mutex_lock(&kexec_mutex);
+	kexec_lock();
 	if (crashk_res.end != crashk_res.start)
 		size = resource_size(&crashk_res);
-	mutex_unlock(&kexec_mutex);
+	kexec_unlock();
 	return size;
 }
 
@@ -1009,7 +1037,7 @@ int crash_shrink_memory(unsigned long ne
 	unsigned long old_size;
 	struct resource *ram_res;
 
-	mutex_lock(&kexec_mutex);
+	kexec_lock();
 
 	if (kexec_crash_image) {
 		ret = -ENOENT;
@@ -1047,7 +1075,7 @@ int crash_shrink_memory(unsigned long ne
 	insert_resource(&iomem_resource, ram_res);
 
 unlock:
-	mutex_unlock(&kexec_mutex);
+	kexec_unlock();
 	return ret;
 }
 
@@ -1119,7 +1147,7 @@ int kernel_kexec(void)
 {
 	int error = 0;
 
-	if (!mutex_trylock(&kexec_mutex))
+	if (!kexec_trylock())
 		return -EBUSY;
 	if (!kexec_image) {
 		error = -EINVAL;
@@ -1196,7 +1224,7 @@ int kernel_kexec(void)
 #endif
 
  Unlock:
-	mutex_unlock(&kexec_mutex);
+	kexec_unlock();
 	return error;
 }
 
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -384,7 +384,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, ke
 
 	image = NULL;
 
-	if (!mutex_trylock(&kexec_mutex))
+	if (!kexec_trylock())
 		return -EBUSY;
 
 	dest_image = &kexec_image;
@@ -452,7 +452,7 @@ out:
 	if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
 		arch_kexec_protect_crashkres();
 
-	mutex_unlock(&kexec_mutex);
+	kexec_unlock();
 	kimage_free(image);
 	return ret;
 }
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -13,7 +13,10 @@ void kimage_terminate(struct kimage *ima
 int kimage_is_destination_range(struct kimage *image,
 				unsigned long start, unsigned long end);
 
-extern struct mutex kexec_mutex;
+void kexec_lock(void);
+int kexec_trylock(void);
+void kexec_unlock(void);
+int kexec_is_locked(void);
 
 #ifdef CONFIG_KEXEC_FILE
 #include <linux/purgatory.h>