Petr Mladek 9009e7
From cd2440d66fec7d1bdb4f605b64c27c63c9141989 Mon Sep 17 00:00:00 2001
Petr Mladek 9009e7
From: Petr Mladek <pmladek@suse.com>
Petr Mladek 9009e7
Date: Tue, 7 Mar 2023 13:53:35 +0100
Petr Mladek 9009e7
Subject: [PATCH] workqueue: Print backtraces from CPUs with hung CPU bound
Petr Mladek 9009e7
 workqueues
Petr Mladek 9009e7
Git-commit: cd2440d66fec7d1bdb4f605b64c27c63c9141989
Petr Mladek 9009e7
Patch-mainline: v6.4-rc1
Petr Mladek 9009e7
References: bsc#1211044
Petr Mladek 9009e7
Petr Mladek 9009e7
The workqueue watchdog reports a lockup when there was not any progress
Petr Mladek 9009e7
in the worker pool for a long time. The progress means that a pending
Petr Mladek 9009e7
work item starts being proceed.
Petr Mladek 9009e7
Petr Mladek 9009e7
Worker pools for unbound workqueues always wake up an idle worker and
Petr Mladek 9009e7
try to process the work immediately. The last idle worker has to create
Petr Mladek 9009e7
new worker first. The stall might happen only when a new worker could
Petr Mladek 9009e7
not be created in which case an error should get printed. Another problem
Petr Mladek 9009e7
might be too high load. In this case, workers are victims of a global
Petr Mladek 9009e7
system problem.
Petr Mladek 9009e7
Petr Mladek 9009e7
Worker pools for CPU bound workqueues are designed for lightweight
Petr Mladek 9009e7
work items that do not need much CPU time. They are proceed one by
Petr Mladek 9009e7
one on a single worker. New worker is used only when a work is sleeping.
Petr Mladek 9009e7
It creates one additional scenario. The stall might happen when
Petr Mladek 9009e7
the CPU-bound workqueue is used for CPU-intensive work.
Petr Mladek 9009e7
Petr Mladek 9009e7
More precisely, the stall is detected when a CPU-bound worker is in
Petr Mladek 9009e7
the TASK_RUNNING state for too long. In this case, it might be useful
Petr Mladek 9009e7
to see the backtrace from the problematic worker.
Petr Mladek 9009e7
Petr Mladek 9009e7
The information how long a worker is in the running state is not available.
Petr Mladek 9009e7
But the CPU-bound worker pools do not have many workers in the running
Petr Mladek 9009e7
state by definition. And only few pools are typically blocked.
Petr Mladek 9009e7
Petr Mladek 9009e7
It should be acceptable to print backtraces from all workers in
Petr Mladek 9009e7
TASK_RUNNING state in the stalled worker pools. The number of false
Petr Mladek 9009e7
positives should be very low.
Petr Mladek 9009e7
Petr Mladek 9009e7
Signed-off-by: Petr Mladek <pmladek@suse.com>
Petr Mladek 9009e7
Signed-off-by: Tejun Heo <tj@kernel.org>
Petr Mladek 9009e7
Petr Mladek 9009e7
---
Petr Mladek 9009e7
 kernel/workqueue.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++
Petr Mladek 9009e7
 1 file changed, 57 insertions(+)
Petr Mladek 9009e7
Petr Mladek 9009e7
--- a/kernel/workqueue.c
Petr Mladek 9009e7
+++ b/kernel/workqueue.c
Petr Mladek 9009e7
@@ -48,6 +48,7 @@
Petr Mladek 9009e7
 #include <linux/nodemask.h>
Petr Mladek 9009e7
 #include <linux/moduleparam.h>
Petr Mladek 9009e7
 #include <linux/uaccess.h>
Petr Mladek 9009e7
+#include <linux/sched/debug.h>
Petr Mladek 9009e7
 #include <linux/nmi.h>
Petr Mladek 9009e7
 #include <linux/kvm_para.h>
Petr Mladek 9009e7
 
Petr Mladek 9009e7
@@ -140,6 +141,8 @@ enum {
Petr Mladek 9009e7
  * WR: wq->mutex protected for writes.  Sched-RCU protected for reads.
Petr Mladek 9009e7
  *
Petr Mladek 9009e7
  * MD: wq_mayday_lock protected.
Petr Mladek 9009e7
+ *
Petr Mladek 9009e7
+ * WD: Used internally by the watchdog.
Petr Mladek 9009e7
  */
Petr Mladek 9009e7
 
Petr Mladek 9009e7
 /* struct worker is defined in workqueue_internal.h */
Petr Mladek 9009e7
@@ -152,6 +155,7 @@ struct worker_pool {
Petr Mladek 9009e7
 	unsigned int		flags;		/* X: flags */
Petr Mladek 9009e7
 
Petr Mladek 9009e7
 	unsigned long		watchdog_ts;	/* L: watchdog timestamp */
Petr Mladek 9009e7
+	bool			cpu_stall;	/* WD: stalled cpu bound pool */
Petr Mladek 9009e7
 
Petr Mladek 9009e7
 	struct list_head	worklist;	/* L: list of pending works */
Petr Mladek 9009e7
 	int			nr_workers;	/* L: total number of workers */
Petr Mladek 9009e7
@@ -5493,6 +5497,48 @@ static struct timer_list wq_watchdog_tim
Petr Mladek 9009e7
 static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
Petr Mladek 9009e7
 static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
Petr Mladek 9009e7
 
Petr Mladek 9009e7
+/*
Petr Mladek 9009e7
+ * Show workers that might prevent the processing of pending work items.
Petr Mladek 9009e7
+ * The only candidates are CPU-bound workers in the running state.
Petr Mladek 9009e7
+ * Pending work items should be handled by another idle worker
Petr Mladek 9009e7
+ * in all other situations.
Petr Mladek 9009e7
+ */
Petr Mladek 9009e7
+static void show_cpu_pool_hog(struct worker_pool *pool)
Petr Mladek 9009e7
+{
Petr Mladek 9009e7
+	struct worker *worker;
Petr Mladek 9009e7
+	unsigned long flags;
Petr Mladek 9009e7
+	int bkt;
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+	spin_lock_irqsave(&pool->lock, flags);
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+	hash_for_each(pool->busy_hash, bkt, worker, hentry) {
Petr Mladek 9009e7
+		if (worker->task->state == TASK_RUNNING) {
Petr Mladek 9009e7
+			pr_info("pool %d:\n", pool->id);
Petr Mladek 9009e7
+			sched_show_task(worker->task);
Petr Mladek 9009e7
+		}
Petr Mladek 9009e7
+	}
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+	spin_unlock_irqrestore(&pool->lock, flags);
Petr Mladek 9009e7
+}
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+static void show_cpu_pools_hogs(void)
Petr Mladek 9009e7
+{
Petr Mladek 9009e7
+	struct worker_pool *pool;
Petr Mladek 9009e7
+	int pi;
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+	pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n");
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+	rcu_read_lock();
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+	for_each_pool(pool, pi) {
Petr Mladek 9009e7
+		if (pool->cpu_stall)
Petr Mladek 9009e7
+			show_cpu_pool_hog(pool);
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+	}
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+	rcu_read_unlock();
Petr Mladek 9009e7
+}
Petr Mladek 9009e7
+
Petr Mladek 9009e7
 static void wq_watchdog_reset_touched(void)
Petr Mladek 9009e7
 {
Petr Mladek 9009e7
 	int cpu;
Petr Mladek 9009e7
@@ -5506,6 +5552,7 @@ static void wq_watchdog_timer_fn(unsigne
Petr Mladek 9009e7
 {
Petr Mladek 9009e7
 	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
Petr Mladek 9009e7
 	bool lockup_detected = false;
Petr Mladek 9009e7
+	bool cpu_pool_stall = false;
Petr Mladek 9009e7
 	unsigned long now = jiffies;
Petr Mladek 9009e7
 	struct worker_pool *pool;
Petr Mladek 9009e7
 	int pi;
Petr Mladek 9009e7
@@ -5518,6 +5565,7 @@ static void wq_watchdog_timer_fn(unsigne
Petr Mladek 9009e7
 	for_each_pool(pool, pi) {
Petr Mladek 9009e7
 		unsigned long pool_ts, touched, ts;
Petr Mladek 9009e7
 
Petr Mladek 9009e7
+		pool->cpu_stall = false;
Petr Mladek 9009e7
 		if (list_empty(&pool->worklist))
Petr Mladek 9009e7
 			continue;
Petr Mladek 9009e7
 
Petr Mladek 9009e7
@@ -5547,11 +5595,17 @@ static void wq_watchdog_timer_fn(unsigne
Petr Mladek 9009e7
 		/* did we stall? */
Petr Mladek 9009e7
 		if (time_after(now, ts + thresh)) {
Petr Mladek 9009e7
 			lockup_detected = true;
Petr Mladek 9009e7
+			if (pool->cpu >= 0) {
Petr Mladek 9009e7
+				pool->cpu_stall = true;
Petr Mladek 9009e7
+				cpu_pool_stall = true;
Petr Mladek 9009e7
+			}
Petr Mladek 9009e7
 			pr_emerg("BUG: workqueue lockup - pool");
Petr Mladek 9009e7
 			pr_cont_pool_info(pool);
Petr Mladek 9009e7
 			pr_cont(" stuck for %us!\n",
Petr Mladek 9009e7
 				jiffies_to_msecs(now - pool_ts) / 1000);
Petr Mladek 9009e7
 		}
Petr Mladek 9009e7
+
Petr Mladek 9009e7
+
Petr Mladek 9009e7
 	}
Petr Mladek 9009e7
 
Petr Mladek 9009e7
 	rcu_read_unlock();
Petr Mladek 9009e7
@@ -5559,6 +5613,9 @@ static void wq_watchdog_timer_fn(unsigne
Petr Mladek 9009e7
 	if (lockup_detected)
Petr Mladek 9009e7
 		show_workqueue_state();
Petr Mladek 9009e7
 
Petr Mladek 9009e7
+	if (cpu_pool_stall)
Petr Mladek 9009e7
+		show_cpu_pools_hogs();
Petr Mladek 9009e7
+
Petr Mladek 9009e7
 	wq_watchdog_reset_touched();
Petr Mladek 9009e7
 	mod_timer(&wq_watchdog_timer, jiffies + thresh);
Petr Mladek 9009e7
 }