Blob Blame History Raw
From 8d713b699e84aade6b64e241a35f22e166fc8174 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 23 Sep 2021 18:54:41 +0200
Subject: [PATCH] sched: Make might_sleep() output less confusing
Git-commit: 8d713b699e84aade6b64e241a35f22e166fc8174
Patch-mainline: v5.16-rc1
References: bsc#1190137 bsc#1189998

might_sleep() output is pretty informative, but can be confusing at times
especially with PREEMPT_RCU when the check triggers due to a voluntary
sleep inside a RCU read side critical section:

 BUG: sleeping function called from invalid context at kernel/test.c:110
 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52
 Preemption disabled at: migrate_disable+0x33/0xa0

in_atomic() is 0, but it still tells that preemption was disabled at
migrate_disable(), which is completely useless because preemption is not
disabled. But the interesting information to decode the above, i.e. the RCU
nesting depth, is not printed.

That becomes even more confusing when might_sleep() is invoked from
cond_resched_lock() within a RCU read side critical section. Here the
expected preemption count is 1 and not 0.

 BUG: sleeping function called from invalid context at kernel/test.c:131
 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52
 Preemption disabled at: test_cond_lock+0xf3/0x1c0

So in_atomic() is set, which is expected as the caller holds a spinlock,
but it's unclear why this is broken and the preempt disable IP is just
pointing at the correct place, i.e. spin_lock(), which is obviously not
helpful either.

Make that more useful in general:

 - Print preempt_count() and the expected value

and for the CONFIG_PREEMPT_RCU case:

 - Print the RCU read side critical section nesting depth

 - Print the preempt disable IP only when preempt count
   does not have the expected value.

So the might_sleep() dump from a within a preemptible RCU read side
critical section becomes:

 BUG: sleeping function called from invalid context at kernel/test.c:110
 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52
 preempt_count: 0, expected: 0
 RCU nest depth: 1, expected: 0

and the cond_resched_lock() case becomes:

 BUG: sleeping function called from invalid context at kernel/test.c:141
 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52
 preempt_count: 1, expected: 1
 RCU nest depth: 1, expected: 0

which makes is pretty obvious what's going on. For all other cases the
preempt disable IP is still printed as before:

 BUG: sleeping function called from invalid context at kernel/test.c: 156
 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0
 preempt_count: 1, expected: 0
 RCU nest depth: 0, expected: 0
 Preemption disabled at:
 [<ffffffff82b48326>] test_might_sleep+0xbe/0xf8

 BUG: sleeping function called from invalid context at kernel/test.c: 163
 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0
 preempt_count: 1, expected: 0
 RCU nest depth: 1, expected: 0
 Preemption disabled at:
 [<ffffffff82b48326>] test_might_sleep+0x1e4/0x280

This also prepares to provide a better debugging output for RT enabled
kernels and their spinlock substitutions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210923165358.181022656@linutronix.de
Signed-off-by: Davidlohr Bueso <dbueso@suse.de> 

---
 kernel/sched/core.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a7c6069ddf9d..0a27cb8f72a9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9493,6 +9493,18 @@ void __might_sleep(const char *file, int line)
 }
 EXPORT_SYMBOL(__might_sleep);
 
+static void print_preempt_disable_ip(int preempt_offset, unsigned long ip)
+{
+	if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT))
+		return;
+
+	if (preempt_count() == preempt_offset)
+		return;
+
+	pr_err("Preemption disabled at:");
+	print_ip_sym(KERN_ERR, ip);
+}
+
 void __might_resched(const char *file, int line, int preempt_offset)
 {
 	/* Ratelimiting timestamp: */
@@ -9521,6 +9533,13 @@ void __might_resched(const char *file, int line, int preempt_offset)
 	pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
 	       in_atomic(), irqs_disabled(), current->non_block_count,
 	       current->pid, current->comm);
+	pr_err("preempt_count: %x, expected: %x\n", preempt_count(),
+	       preempt_offset);
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
+		pr_err("RCU nest depth: %d, expected: 0\n",
+		       rcu_preempt_depth());
+	}
 
 	if (task_stack_end_corrupted(current))
 		pr_emerg("Thread overran stack, or stack corrupted\n");
@@ -9528,11 +9547,9 @@ void __might_resched(const char *file, int line, int preempt_offset)
 	debug_show_held_locks(current);
 	if (irqs_disabled())
 		print_irqtrace_events(current);
-	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-	    && !preempt_count_equals(preempt_offset)) {
-		pr_err("Preemption disabled at:");
-		print_ip_sym(KERN_ERR, preempt_disable_ip);
-	}
+
+	print_preempt_disable_ip(preempt_offset, preempt_disable_ip);
+
 	dump_stack();
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
-- 
2.26.2