From fe6c860b744c9bb5423095aa569928c61229e353 Mon Sep 17 00:00:00 2001 From: Ivan T. Ivanov Date: May 11 2023 10:02:00 +0000 Subject: Merge branch 'users/pmladek/SLE12-SP5/for-next' into SLE12-SP5 --- diff --git a/blacklist.conf b/blacklist.conf index 8d8160d..2c00547 100644 --- a/blacklist.conf +++ b/blacklist.conf @@ -577,6 +577,7 @@ b60706644282af04e4aa57da5af57470d453cd1f # vsprintf: cosmetic 1cf12e08bc4d50a76b80c42a3109c53d8794a0c9 # sched/hotplug: added here just to make sure that it will not be backported without followup fixes, e.g. ac687e6e8c26181a33 ac687e6e8c26181a33270efd1a2e2241377924b0 # kthread: not needed; part of a regression fix for the commit 1cf12e08bc4d ("sched/hotplug: Consolidate task migration on CPU unplug"); the regression commit is blacklisted as well 01341fbd0d8d4e717fc1231cdffe00343088ce0b # workqueue: Non-trivial reasoning why the change is correct. Fixing a corner case. Workqueues are typically allocated only once during boot so that the problem should not happen at runtime. +342ed2400b78072cc01c0130ce41240dec60d56d # workqueue: Cosmetic change. Not worth backporting. bsc#1211275 4950276672fce5c241857540f8561c440663673d # kmemcheck removal; not for released products d8be75663cec0069b85f80191abd2682ce4a512f # related to kmemcheck removal; not for released products a6da0024ffc19e0d47712bb5ca4fd083f76b07df # blktrace: fix unlocked registration of tracepoints; racy for ages; found by syzcaller; not worth it diff --git a/patches.suse/workqueue-Fix-hung-time-report-of-worker-pools.patch b/patches.suse/workqueue-Fix-hung-time-report-of-worker-pools.patch new file mode 100644 index 0000000..af14bb1 --- /dev/null +++ b/patches.suse/workqueue-Fix-hung-time-report-of-worker-pools.patch @@ -0,0 +1,60 @@ +From 335a42ebb0ca8ee9997a1731aaaae6dcd704c113 Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Tue, 7 Mar 2023 13:53:31 +0100 +Subject: [PATCH] workqueue: Fix hung time report of worker pools +Git-commit: 335a42ebb0ca8ee9997a1731aaaae6dcd704c113 +Patch-mainline: v6.4-rc1 +References: bsc#1211044 + +The workqueue watchdog prints a warning when there is no progress in +a worker pool. Where the progress means that the pool started processing +a pending work item. + +Note that it is perfectly fine to process work items much longer. +The progress should be guaranteed by waking up or creating idle +workers. + +show_one_worker_pool() prints state of non-idle worker pool. It shows +a delay since the last pool->watchdog_ts. + +The timestamp is updated when a first pending work is queued in +__queue_work(). Also it is updated when a work is dequeued for +processing in worker_thread() and rescuer_thread(). + +The delay is misleading when there is no pending work item. In this +case it shows how long the last work item is being proceed. Show +zero instead. There is no stall if there is no pending work. + +Fixes: 82607adcf9cdf40fb7b ("workqueue: implement lockup detector") +Signed-off-by: Petr Mladek +Signed-off-by: Tejun Heo + +--- + kernel/workqueue.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -4879,16 +4879,19 @@ void show_workqueue_state(void) + for_each_pool(pool, pi) { + struct worker *worker; + bool first = true; ++ unsigned long hung = 0; + + spin_lock_irqsave(&pool->lock, flags); + if (pool->nr_workers == pool->nr_idle) + goto next_pool; + ++ /* How long the first pending work is waiting for a worker. */ ++ if (!list_empty(&pool->worklist)) ++ hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000; ++ + pr_info("pool %d:", pool->id); + pr_cont_pool_info(pool); +- pr_cont(" hung=%us workers=%d", +- jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000, +- pool->nr_workers); ++ pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers); + if (pool->manager) + pr_cont(" manager: %d", + task_pid_nr(pool->manager->task)); diff --git a/patches.suse/workqueue-Interrupted-create_worker-is-not-a-repeate.patch b/patches.suse/workqueue-Interrupted-create_worker-is-not-a-repeate.patch new file mode 100644 index 0000000..074efe2 --- /dev/null +++ b/patches.suse/workqueue-Interrupted-create_worker-is-not-a-repeate.patch @@ -0,0 +1,48 @@ +From 60f540389a5d2df25ddc7ad511b4fa2880dea521 Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Tue, 7 Mar 2023 13:53:33 +0100 +Subject: [PATCH] workqueue: Interrupted create_worker() is not a repeated + event +Git-commit: 60f540389a5d2df25ddc7ad511b4fa2880dea521 +Patch-mainline: v6.4-rc1 +References: bsc#1211044 + +kthread_create_on_node() might get interrupted(). It is rare but realistic. +For example, when an unbound workqueue is allocated in module_init() +callback. It is done in the context of the "modprobe" process. And, +for example, systemd might kill pending processes when switching root +from initrd to the booted system. + +The interrupt is a one-off event and the race might be hard to reproduce. +It is always worth printing. + +Signed-off-by: Petr Mladek +Signed-off-by: Tejun Heo + +--- + kernel/workqueue.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/kernel/workqueue.c b/kernel/workqueue.c +index 9760f0fca82d..5f0ecaaaf997 100644 +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -1959,8 +1959,13 @@ static struct worker *create_worker(struct worker_pool *pool) + worker->task = kthread_create_on_node(worker_thread, worker, pool->node, + "kworker/%s", id_buf); + if (IS_ERR(worker->task)) { +- pr_err_once("workqueue: Failed to create a worker thread: %ld", +- PTR_ERR(worker->task)); ++ if (PTR_ERR(worker->task) == -EINTR) { ++ pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n", ++ id_buf); ++ } else { ++ pr_err_once("workqueue: Failed to create a worker thread: %ld", ++ PTR_ERR(worker->task)); ++ } + goto fail; + } + +-- +2.35.3 + diff --git a/patches.suse/workqueue-Print-backtraces-from-CPUs-with-hung-CPU-b.patch b/patches.suse/workqueue-Print-backtraces-from-CPUs-with-hung-CPU-b.patch new file mode 100644 index 0000000..7d72e05 --- /dev/null +++ b/patches.suse/workqueue-Print-backtraces-from-CPUs-with-hung-CPU-b.patch @@ -0,0 +1,165 @@ +From cd2440d66fec7d1bdb4f605b64c27c63c9141989 Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Tue, 7 Mar 2023 13:53:35 +0100 +Subject: [PATCH] workqueue: Print backtraces from CPUs with hung CPU bound + workqueues +Git-commit: cd2440d66fec7d1bdb4f605b64c27c63c9141989 +Patch-mainline: v6.4-rc1 +References: bsc#1211044 + +The workqueue watchdog reports a lockup when there was not any progress +in the worker pool for a long time. The progress means that a pending +work item starts being proceed. + +Worker pools for unbound workqueues always wake up an idle worker and +try to process the work immediately. The last idle worker has to create +new worker first. The stall might happen only when a new worker could +not be created in which case an error should get printed. Another problem +might be too high load. In this case, workers are victims of a global +system problem. + +Worker pools for CPU bound workqueues are designed for lightweight +work items that do not need much CPU time. They are proceed one by +one on a single worker. New worker is used only when a work is sleeping. +It creates one additional scenario. The stall might happen when +the CPU-bound workqueue is used for CPU-intensive work. + +More precisely, the stall is detected when a CPU-bound worker is in +the TASK_RUNNING state for too long. In this case, it might be useful +to see the backtrace from the problematic worker. + +The information how long a worker is in the running state is not available. +But the CPU-bound worker pools do not have many workers in the running +state by definition. And only few pools are typically blocked. + +It should be acceptable to print backtraces from all workers in +TASK_RUNNING state in the stalled worker pools. The number of false +positives should be very low. + +Signed-off-by: Petr Mladek +Signed-off-by: Tejun Heo + +--- + kernel/workqueue.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 57 insertions(+) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -48,6 +48,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -140,6 +141,8 @@ enum { + * WR: wq->mutex protected for writes. Sched-RCU protected for reads. + * + * MD: wq_mayday_lock protected. ++ * ++ * WD: Used internally by the watchdog. + */ + + /* struct worker is defined in workqueue_internal.h */ +@@ -152,6 +155,7 @@ struct worker_pool { + unsigned int flags; /* X: flags */ + + unsigned long watchdog_ts; /* L: watchdog timestamp */ ++ bool cpu_stall; /* WD: stalled cpu bound pool */ + + struct list_head worklist; /* L: list of pending works */ + int nr_workers; /* L: total number of workers */ +@@ -5493,6 +5497,48 @@ static struct timer_list wq_watchdog_tim + static unsigned long wq_watchdog_touched = INITIAL_JIFFIES; + static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES; + ++/* ++ * Show workers that might prevent the processing of pending work items. ++ * The only candidates are CPU-bound workers in the running state. ++ * Pending work items should be handled by another idle worker ++ * in all other situations. ++ */ ++static void show_cpu_pool_hog(struct worker_pool *pool) ++{ ++ struct worker *worker; ++ unsigned long flags; ++ int bkt; ++ ++ spin_lock_irqsave(&pool->lock, flags); ++ ++ hash_for_each(pool->busy_hash, bkt, worker, hentry) { ++ if (worker->task->state == TASK_RUNNING) { ++ pr_info("pool %d:\n", pool->id); ++ sched_show_task(worker->task); ++ } ++ } ++ ++ spin_unlock_irqrestore(&pool->lock, flags); ++} ++ ++static void show_cpu_pools_hogs(void) ++{ ++ struct worker_pool *pool; ++ int pi; ++ ++ pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n"); ++ ++ rcu_read_lock(); ++ ++ for_each_pool(pool, pi) { ++ if (pool->cpu_stall) ++ show_cpu_pool_hog(pool); ++ ++ } ++ ++ rcu_read_unlock(); ++} ++ + static void wq_watchdog_reset_touched(void) + { + int cpu; +@@ -5506,6 +5552,7 @@ static void wq_watchdog_timer_fn(unsigne + { + unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; + bool lockup_detected = false; ++ bool cpu_pool_stall = false; + unsigned long now = jiffies; + struct worker_pool *pool; + int pi; +@@ -5518,6 +5565,7 @@ static void wq_watchdog_timer_fn(unsigne + for_each_pool(pool, pi) { + unsigned long pool_ts, touched, ts; + ++ pool->cpu_stall = false; + if (list_empty(&pool->worklist)) + continue; + +@@ -5547,11 +5595,17 @@ static void wq_watchdog_timer_fn(unsigne + /* did we stall? */ + if (time_after(now, ts + thresh)) { + lockup_detected = true; ++ if (pool->cpu >= 0) { ++ pool->cpu_stall = true; ++ cpu_pool_stall = true; ++ } + pr_emerg("BUG: workqueue lockup - pool"); + pr_cont_pool_info(pool); + pr_cont(" stuck for %us!\n", + jiffies_to_msecs(now - pool_ts) / 1000); + } ++ ++ + } + + rcu_read_unlock(); +@@ -5559,6 +5613,9 @@ static void wq_watchdog_timer_fn(unsigne + if (lockup_detected) + show_workqueue_state(); + ++ if (cpu_pool_stall) ++ show_cpu_pools_hogs(); ++ + wq_watchdog_reset_touched(); + mod_timer(&wq_watchdog_timer, jiffies + thresh); + } diff --git a/patches.suse/workqueue-Warn-when-a-new-worker-could-not-be-create.patch b/patches.suse/workqueue-Warn-when-a-new-worker-could-not-be-create.patch new file mode 100644 index 0000000..548c900 --- /dev/null +++ b/patches.suse/workqueue-Warn-when-a-new-worker-could-not-be-create.patch @@ -0,0 +1,98 @@ +From 3f0ea0b864562c6bd1cee892026067eaea7be242 Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Tue, 7 Mar 2023 13:53:32 +0100 +Subject: [PATCH] workqueue: Warn when a new worker could not be created +Git-commit: 3f0ea0b864562c6bd1cee892026067eaea7be242 +Patch-mainline: v6.4-rc1 +References: bsc#1211044 + +The workqueue watchdog reports a lockup when there was not any progress +in the worker pool for a long time. The progress means that a pending +work item starts being proceed. + +The progress is guaranteed by using idle workers or creating new workers +for pending work items. + +There are several reasons why a new worker could not be created: + + + there is not enough memory + + + there is no free pool ID (IDR API) + + + the system reached PID limit + + + the process creating the new worker was interrupted + + + the last idle worker (manager) has not been scheduled for a long + time. It was not able to even start creating the kthread. + +None of these failures is reported at the moment. The only clue is that +show_one_worker_pool() prints that there is a manager. It is the last +idle worker that is responsible for creating a new one. But it is not +clear if create_worker() is failing and why. + +Make the debugging easier by printing errors in create_worker(). + +The error code is important, especially from kthread_create_on_node(). +It helps to distinguish the various reasons. For example, reaching +memory limit (-ENOMEM), other system limits (-EAGAIN), or process +interrupted (-EINTR). + +Use pr_once() to avoid repeating the same error every CREATE_COOLDOWN +for each stuck worker pool. + +Ratelimited printk() might be better. It would help to know if the problem +remains. It would be more clear if the create_worker() errors and workqueue +stalls are related. Also old messages might get lost when the internal log +buffer is full. The problem is that printk() might touch the watchdog. +For example, see touch_nmi_watchdog() in serial8250_console_write(). +It would require synchronization of the begin and length of the ratelimit +interval with the workqueue watchdog. Otherwise, the error messages +might break the watchdog. This does not look worth the complexity. + +Signed-off-by: Petr Mladek +Signed-off-by: Tejun Heo + +--- + kernel/workqueue.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/kernel/workqueue.c b/kernel/workqueue.c +index 8c0ec21a86a2..9760f0fca82d 100644 +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -1936,12 +1936,16 @@ static struct worker *create_worker(struct worker_pool *pool) + + /* ID is needed to determine kthread name */ + id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL); +- if (id < 0) ++ if (id < 0) { ++ pr_err_once("workqueue: Failed to allocate a worker ID: %d\n", id); + goto fail; ++ } + + worker = alloc_worker(pool->node); +- if (!worker) ++ if (!worker) { ++ pr_err_once("workqueue: Failed to allocate a worker\n"); + goto fail; ++ } + + worker->pool = pool; + worker->id = id; +@@ -1953,8 +1958,11 @@ static struct worker *create_worker(struct worker_pool *pool) + + worker->task = kthread_create_on_node(worker_thread, worker, pool->node, + "kworker/%s", id_buf); +- if (IS_ERR(worker->task)) ++ if (IS_ERR(worker->task)) { ++ pr_err_once("workqueue: Failed to create a worker thread: %ld", ++ PTR_ERR(worker->task)); + goto fail; ++ } + + set_user_nice(worker->task, pool->attrs->nice); + kthread_bind_mask(worker->task, pool->attrs->cpumask); +-- +2.35.3 + diff --git a/patches.suse/workqueue-Warn-when-a-rescuer-could-not-be-created.patch b/patches.suse/workqueue-Warn-when-a-rescuer-could-not-be-created.patch new file mode 100644 index 0000000..86e7e53 --- /dev/null +++ b/patches.suse/workqueue-Warn-when-a-rescuer-could-not-be-created.patch @@ -0,0 +1,48 @@ +From 4c0736a76a186e5df2cd2afda3e7a04d2a427d1b Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Tue, 7 Mar 2023 13:53:34 +0100 +Subject: [PATCH] workqueue: Warn when a rescuer could not be created +Git-commit: 4c0736a76a186e5df2cd2afda3e7a04d2a427d1b +Patch-mainline: v6.4-rc1 +References: bsc#1211044 + +Rescuers are created when a workqueue with WQ_MEM_RECLAIM is allocated. +It typically happens during the system boot. + +systemd switches the root filesystem from initrd to the booted system +during boot. It kills processes that block the switch for too long. +One of the process might be modprobe that tries to create a workqueue. + +These problems are hard to reproduce. Also alloc_workqueue() does not +pass the error code. Make the debugging easier by printing an error, +similar to create_worker(). + +Signed-off-by: Petr Mladek +Signed-off-by: Tejun Heo + +--- + kernel/workqueue.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -4074,13 +4074,18 @@ struct workqueue_struct *__alloc_workque + struct worker *rescuer; + + rescuer = alloc_worker(NUMA_NO_NODE); +- if (!rescuer) ++ if (!rescuer) { ++ pr_err("workqueue: Failed to allocate a rescuer for wq \"%s\"\n", ++ wq->name); + goto err_destroy; ++ } + + rescuer->rescue_wq = wq; + rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", + wq->name); + if (IS_ERR(rescuer->task)) { ++ pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %ld", ++ wq->name, PTR_ERR(rescuer->task)); + kfree(rescuer); + goto err_destroy; + } diff --git a/series.conf b/series.conf index 175d0a7..a2e8c96 100644 --- a/series.conf +++ b/series.conf @@ -63326,6 +63326,11 @@ patches.suse/USB-dwc3-fix-runtime-pm-imbalance-on-probe-errors.patch patches.suse/USB-dwc3-fix-runtime-pm-imbalance-on-unbind.patch patches.suse/usb-chipidea-fix-missing-goto-in-ci_hdrc_probe.patch + patches.suse/workqueue-Fix-hung-time-report-of-worker-pools.patch + patches.suse/workqueue-Warn-when-a-new-worker-could-not-be-create.patch + patches.suse/workqueue-Interrupted-create_worker-is-not-a-repeate.patch + patches.suse/workqueue-Warn-when-a-rescuer-could-not-be-created.patch + patches.suse/workqueue-Print-backtraces-from-CPUs-with-hung-CPU-b.patch patches.suse/xfs-verify-buffer-contents-when-we-skip-log-replay.patch # dhowells/linux-fs keys-uefi