From fe6c860b744c9bb5423095aa569928c61229e353 Mon Sep 17 00:00:00 2001
From: Ivan T. Ivanov <iivanov@suse.de>
Date: May 11 2023 10:02:00 +0000
Subject: Merge branch 'users/pmladek/SLE12-SP5/for-next' into SLE12-SP5


---

diff --git a/blacklist.conf b/blacklist.conf
index 8d8160d..2c00547 100644
--- a/blacklist.conf
+++ b/blacklist.conf
@@ -577,6 +577,7 @@ b60706644282af04e4aa57da5af57470d453cd1f # vsprintf: cosmetic
 1cf12e08bc4d50a76b80c42a3109c53d8794a0c9 # sched/hotplug: added here just to make sure that it will not be backported without followup fixes, e.g. ac687e6e8c26181a33
 ac687e6e8c26181a33270efd1a2e2241377924b0 # kthread: not needed; part of a regression fix for the commit 1cf12e08bc4d ("sched/hotplug: Consolidate task migration on CPU unplug"); the regression commit is blacklisted as well
 01341fbd0d8d4e717fc1231cdffe00343088ce0b # workqueue: Non-trivial reasoning why the change is correct. Fixing a corner case. Workqueues are typically allocated only once during boot so that the problem should not happen at runtime.
+342ed2400b78072cc01c0130ce41240dec60d56d # workqueue: Cosmetic change. Not worth backporting. bsc#1211275
 4950276672fce5c241857540f8561c440663673d # kmemcheck removal; not for released products
 d8be75663cec0069b85f80191abd2682ce4a512f # related to kmemcheck removal; not for released products
 a6da0024ffc19e0d47712bb5ca4fd083f76b07df # blktrace: fix unlocked registration of tracepoints; racy for ages; found by syzcaller; not worth it
diff --git a/patches.suse/workqueue-Fix-hung-time-report-of-worker-pools.patch b/patches.suse/workqueue-Fix-hung-time-report-of-worker-pools.patch
new file mode 100644
index 0000000..af14bb1
--- /dev/null
+++ b/patches.suse/workqueue-Fix-hung-time-report-of-worker-pools.patch
@@ -0,0 +1,60 @@
+From 335a42ebb0ca8ee9997a1731aaaae6dcd704c113 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Tue, 7 Mar 2023 13:53:31 +0100
+Subject: [PATCH] workqueue: Fix hung time report of worker pools
+Git-commit: 335a42ebb0ca8ee9997a1731aaaae6dcd704c113
+Patch-mainline: v6.4-rc1
+References: bsc#1211044
+
+The workqueue watchdog prints a warning when there is no progress in
+a worker pool. Where the progress means that the pool started processing
+a pending work item.
+
+Note that it is perfectly fine to process work items much longer.
+The progress should be guaranteed by waking up or creating idle
+workers.
+
+show_one_worker_pool() prints state of non-idle worker pool. It shows
+a delay since the last pool->watchdog_ts.
+
+The timestamp is updated when a first pending work is queued in
+__queue_work(). Also it is updated when a work is dequeued for
+processing in worker_thread() and rescuer_thread().
+
+The delay is misleading when there is no pending work item. In this
+case it shows how long the last work item is being proceed. Show
+zero instead. There is no stall if there is no pending work.
+
+Fixes: 82607adcf9cdf40fb7b ("workqueue: implement lockup detector")
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+
+---
+ kernel/workqueue.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -4879,16 +4879,19 @@ void show_workqueue_state(void)
+ 	for_each_pool(pool, pi) {
+ 		struct worker *worker;
+ 		bool first = true;
++		unsigned long hung = 0;
+ 
+ 		spin_lock_irqsave(&pool->lock, flags);
+ 		if (pool->nr_workers == pool->nr_idle)
+ 			goto next_pool;
+ 
++		/* How long the first pending work is waiting for a worker. */
++		if (!list_empty(&pool->worklist))
++			hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000;
++
+ 		pr_info("pool %d:", pool->id);
+ 		pr_cont_pool_info(pool);
+-		pr_cont(" hung=%us workers=%d",
+-			jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
+-			pool->nr_workers);
++		pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers);
+ 		if (pool->manager)
+ 			pr_cont(" manager: %d",
+ 				task_pid_nr(pool->manager->task));
diff --git a/patches.suse/workqueue-Interrupted-create_worker-is-not-a-repeate.patch b/patches.suse/workqueue-Interrupted-create_worker-is-not-a-repeate.patch
new file mode 100644
index 0000000..074efe2
--- /dev/null
+++ b/patches.suse/workqueue-Interrupted-create_worker-is-not-a-repeate.patch
@@ -0,0 +1,48 @@
+From 60f540389a5d2df25ddc7ad511b4fa2880dea521 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Tue, 7 Mar 2023 13:53:33 +0100
+Subject: [PATCH] workqueue: Interrupted create_worker() is not a repeated
+ event
+Git-commit: 60f540389a5d2df25ddc7ad511b4fa2880dea521
+Patch-mainline: v6.4-rc1
+References: bsc#1211044
+
+kthread_create_on_node() might get interrupted(). It is rare but realistic.
+For example, when an unbound workqueue is allocated in module_init()
+callback. It is done in the context of the "modprobe" process. And,
+for example, systemd might kill pending processes when switching root
+from initrd to the booted system.
+
+The interrupt is a one-off event and the race might be hard to reproduce.
+It is always worth printing.
+
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+
+---
+ kernel/workqueue.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index 9760f0fca82d..5f0ecaaaf997 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -1959,8 +1959,13 @@ static struct worker *create_worker(struct worker_pool *pool)
+ 	worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
+ 					      "kworker/%s", id_buf);
+ 	if (IS_ERR(worker->task)) {
+-		pr_err_once("workqueue: Failed to create a worker thread: %ld",
+-			    PTR_ERR(worker->task));
++		if (PTR_ERR(worker->task) == -EINTR) {
++			pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
++			       id_buf);
++		} else {
++			pr_err_once("workqueue: Failed to create a worker thread: %ld",
++				    PTR_ERR(worker->task));
++		}
+ 		goto fail;
+ 	}
+ 
+-- 
+2.35.3
+
diff --git a/patches.suse/workqueue-Print-backtraces-from-CPUs-with-hung-CPU-b.patch b/patches.suse/workqueue-Print-backtraces-from-CPUs-with-hung-CPU-b.patch
new file mode 100644
index 0000000..7d72e05
--- /dev/null
+++ b/patches.suse/workqueue-Print-backtraces-from-CPUs-with-hung-CPU-b.patch
@@ -0,0 +1,165 @@
+From cd2440d66fec7d1bdb4f605b64c27c63c9141989 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Tue, 7 Mar 2023 13:53:35 +0100
+Subject: [PATCH] workqueue: Print backtraces from CPUs with hung CPU bound
+ workqueues
+Git-commit: cd2440d66fec7d1bdb4f605b64c27c63c9141989
+Patch-mainline: v6.4-rc1
+References: bsc#1211044
+
+The workqueue watchdog reports a lockup when there was not any progress
+in the worker pool for a long time. The progress means that a pending
+work item starts being proceed.
+
+Worker pools for unbound workqueues always wake up an idle worker and
+try to process the work immediately. The last idle worker has to create
+new worker first. The stall might happen only when a new worker could
+not be created in which case an error should get printed. Another problem
+might be too high load. In this case, workers are victims of a global
+system problem.
+
+Worker pools for CPU bound workqueues are designed for lightweight
+work items that do not need much CPU time. They are proceed one by
+one on a single worker. New worker is used only when a work is sleeping.
+It creates one additional scenario. The stall might happen when
+the CPU-bound workqueue is used for CPU-intensive work.
+
+More precisely, the stall is detected when a CPU-bound worker is in
+the TASK_RUNNING state for too long. In this case, it might be useful
+to see the backtrace from the problematic worker.
+
+The information how long a worker is in the running state is not available.
+But the CPU-bound worker pools do not have many workers in the running
+state by definition. And only few pools are typically blocked.
+
+It should be acceptable to print backtraces from all workers in
+TASK_RUNNING state in the stalled worker pools. The number of false
+positives should be very low.
+
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+
+---
+ kernel/workqueue.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 57 insertions(+)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -48,6 +48,7 @@
+ #include <linux/nodemask.h>
+ #include <linux/moduleparam.h>
+ #include <linux/uaccess.h>
++#include <linux/sched/debug.h>
+ #include <linux/nmi.h>
+ #include <linux/kvm_para.h>
+ 
+@@ -140,6 +141,8 @@ enum {
+  * WR: wq->mutex protected for writes.  Sched-RCU protected for reads.
+  *
+  * MD: wq_mayday_lock protected.
++ *
++ * WD: Used internally by the watchdog.
+  */
+ 
+ /* struct worker is defined in workqueue_internal.h */
+@@ -152,6 +155,7 @@ struct worker_pool {
+ 	unsigned int		flags;		/* X: flags */
+ 
+ 	unsigned long		watchdog_ts;	/* L: watchdog timestamp */
++	bool			cpu_stall;	/* WD: stalled cpu bound pool */
+ 
+ 	struct list_head	worklist;	/* L: list of pending works */
+ 	int			nr_workers;	/* L: total number of workers */
+@@ -5493,6 +5497,48 @@ static struct timer_list wq_watchdog_tim
+ static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
+ static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
+ 
++/*
++ * Show workers that might prevent the processing of pending work items.
++ * The only candidates are CPU-bound workers in the running state.
++ * Pending work items should be handled by another idle worker
++ * in all other situations.
++ */
++static void show_cpu_pool_hog(struct worker_pool *pool)
++{
++	struct worker *worker;
++	unsigned long flags;
++	int bkt;
++
++	spin_lock_irqsave(&pool->lock, flags);
++
++	hash_for_each(pool->busy_hash, bkt, worker, hentry) {
++		if (worker->task->state == TASK_RUNNING) {
++			pr_info("pool %d:\n", pool->id);
++			sched_show_task(worker->task);
++		}
++	}
++
++	spin_unlock_irqrestore(&pool->lock, flags);
++}
++
++static void show_cpu_pools_hogs(void)
++{
++	struct worker_pool *pool;
++	int pi;
++
++	pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n");
++
++	rcu_read_lock();
++
++	for_each_pool(pool, pi) {
++		if (pool->cpu_stall)
++			show_cpu_pool_hog(pool);
++
++	}
++
++	rcu_read_unlock();
++}
++
+ static void wq_watchdog_reset_touched(void)
+ {
+ 	int cpu;
+@@ -5506,6 +5552,7 @@ static void wq_watchdog_timer_fn(unsigne
+ {
+ 	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
+ 	bool lockup_detected = false;
++	bool cpu_pool_stall = false;
+ 	unsigned long now = jiffies;
+ 	struct worker_pool *pool;
+ 	int pi;
+@@ -5518,6 +5565,7 @@ static void wq_watchdog_timer_fn(unsigne
+ 	for_each_pool(pool, pi) {
+ 		unsigned long pool_ts, touched, ts;
+ 
++		pool->cpu_stall = false;
+ 		if (list_empty(&pool->worklist))
+ 			continue;
+ 
+@@ -5547,11 +5595,17 @@ static void wq_watchdog_timer_fn(unsigne
+ 		/* did we stall? */
+ 		if (time_after(now, ts + thresh)) {
+ 			lockup_detected = true;
++			if (pool->cpu >= 0) {
++				pool->cpu_stall = true;
++				cpu_pool_stall = true;
++			}
+ 			pr_emerg("BUG: workqueue lockup - pool");
+ 			pr_cont_pool_info(pool);
+ 			pr_cont(" stuck for %us!\n",
+ 				jiffies_to_msecs(now - pool_ts) / 1000);
+ 		}
++
++
+ 	}
+ 
+ 	rcu_read_unlock();
+@@ -5559,6 +5613,9 @@ static void wq_watchdog_timer_fn(unsigne
+ 	if (lockup_detected)
+ 		show_workqueue_state();
+ 
++	if (cpu_pool_stall)
++		show_cpu_pools_hogs();
++
+ 	wq_watchdog_reset_touched();
+ 	mod_timer(&wq_watchdog_timer, jiffies + thresh);
+ }
diff --git a/patches.suse/workqueue-Warn-when-a-new-worker-could-not-be-create.patch b/patches.suse/workqueue-Warn-when-a-new-worker-could-not-be-create.patch
new file mode 100644
index 0000000..548c900
--- /dev/null
+++ b/patches.suse/workqueue-Warn-when-a-new-worker-could-not-be-create.patch
@@ -0,0 +1,98 @@
+From 3f0ea0b864562c6bd1cee892026067eaea7be242 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Tue, 7 Mar 2023 13:53:32 +0100
+Subject: [PATCH] workqueue: Warn when a new worker could not be created
+Git-commit: 3f0ea0b864562c6bd1cee892026067eaea7be242
+Patch-mainline: v6.4-rc1
+References: bsc#1211044
+
+The workqueue watchdog reports a lockup when there was not any progress
+in the worker pool for a long time. The progress means that a pending
+work item starts being proceed.
+
+The progress is guaranteed by using idle workers or creating new workers
+for pending work items.
+
+There are several reasons why a new worker could not be created:
+
+   + there is not enough memory
+
+   + there is no free pool ID (IDR API)
+
+   + the system reached PID limit
+
+   + the process creating the new worker was interrupted
+
+   + the last idle worker (manager) has not been scheduled for a long
+     time. It was not able to even start creating the kthread.
+
+None of these failures is reported at the moment. The only clue is that
+show_one_worker_pool() prints that there is a manager. It is the last
+idle worker that is responsible for creating a new one. But it is not
+clear if create_worker() is failing and why.
+
+Make the debugging easier by printing errors in create_worker().
+
+The error code is important, especially from kthread_create_on_node().
+It helps to distinguish the various reasons. For example, reaching
+memory limit (-ENOMEM), other system limits (-EAGAIN), or process
+interrupted (-EINTR).
+
+Use pr_once() to avoid repeating the same error every CREATE_COOLDOWN
+for each stuck worker pool.
+
+Ratelimited printk() might be better. It would help to know if the problem
+remains. It would be more clear if the create_worker() errors and workqueue
+stalls are related. Also old messages might get lost when the internal log
+buffer is full. The problem is that printk() might touch the watchdog.
+For example, see touch_nmi_watchdog() in serial8250_console_write().
+It would require synchronization of the begin and length of the ratelimit
+interval with the workqueue watchdog. Otherwise, the error messages
+might break the watchdog. This does not look worth the complexity.
+
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+
+---
+ kernel/workqueue.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index 8c0ec21a86a2..9760f0fca82d 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -1936,12 +1936,16 @@ static struct worker *create_worker(struct worker_pool *pool)
+ 
+ 	/* ID is needed to determine kthread name */
+ 	id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
+-	if (id < 0)
++	if (id < 0) {
++		pr_err_once("workqueue: Failed to allocate a worker ID: %d\n", id);
+ 		goto fail;
++	}
+ 
+ 	worker = alloc_worker(pool->node);
+-	if (!worker)
++	if (!worker) {
++		pr_err_once("workqueue: Failed to allocate a worker\n");
+ 		goto fail;
++	}
+ 
+ 	worker->pool = pool;
+ 	worker->id = id;
+@@ -1953,8 +1958,11 @@ static struct worker *create_worker(struct worker_pool *pool)
+ 
+ 	worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
+ 					      "kworker/%s", id_buf);
+-	if (IS_ERR(worker->task))
++	if (IS_ERR(worker->task)) {
++		pr_err_once("workqueue: Failed to create a worker thread: %ld",
++			    PTR_ERR(worker->task));
+ 		goto fail;
++	}
+ 
+ 	set_user_nice(worker->task, pool->attrs->nice);
+ 	kthread_bind_mask(worker->task, pool->attrs->cpumask);
+-- 
+2.35.3
+
diff --git a/patches.suse/workqueue-Warn-when-a-rescuer-could-not-be-created.patch b/patches.suse/workqueue-Warn-when-a-rescuer-could-not-be-created.patch
new file mode 100644
index 0000000..86e7e53
--- /dev/null
+++ b/patches.suse/workqueue-Warn-when-a-rescuer-could-not-be-created.patch
@@ -0,0 +1,48 @@
+From 4c0736a76a186e5df2cd2afda3e7a04d2a427d1b Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Tue, 7 Mar 2023 13:53:34 +0100
+Subject: [PATCH] workqueue: Warn when a rescuer could not be created
+Git-commit: 4c0736a76a186e5df2cd2afda3e7a04d2a427d1b
+Patch-mainline: v6.4-rc1
+References: bsc#1211044
+
+Rescuers are created when a workqueue with WQ_MEM_RECLAIM is allocated.
+It typically happens during the system boot.
+
+systemd switches the root filesystem from initrd to the booted system
+during boot. It kills processes that block the switch for too long.
+One of the process might be modprobe that tries to create a workqueue.
+
+These problems are hard to reproduce. Also alloc_workqueue() does not
+pass the error code. Make the debugging easier by printing an error,
+similar to create_worker().
+
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+
+---
+ kernel/workqueue.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -4074,13 +4074,18 @@ struct workqueue_struct *__alloc_workque
+ 		struct worker *rescuer;
+ 
+ 		rescuer = alloc_worker(NUMA_NO_NODE);
+-		if (!rescuer)
++		if (!rescuer) {
++			pr_err("workqueue: Failed to allocate a rescuer for wq \"%s\"\n",
++			       wq->name);
+ 			goto err_destroy;
++		}
+ 
+ 		rescuer->rescue_wq = wq;
+ 		rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
+ 					       wq->name);
+ 		if (IS_ERR(rescuer->task)) {
++			pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %ld",
++			       wq->name, PTR_ERR(rescuer->task));
+ 			kfree(rescuer);
+ 			goto err_destroy;
+ 		}
diff --git a/series.conf b/series.conf
index 175d0a7..a2e8c96 100644
--- a/series.conf
+++ b/series.conf
@@ -63326,6 +63326,11 @@
 	patches.suse/USB-dwc3-fix-runtime-pm-imbalance-on-probe-errors.patch
 	patches.suse/USB-dwc3-fix-runtime-pm-imbalance-on-unbind.patch
 	patches.suse/usb-chipidea-fix-missing-goto-in-ci_hdrc_probe.patch
+	patches.suse/workqueue-Fix-hung-time-report-of-worker-pools.patch
+	patches.suse/workqueue-Warn-when-a-new-worker-could-not-be-create.patch
+	patches.suse/workqueue-Interrupted-create_worker-is-not-a-repeate.patch
+	patches.suse/workqueue-Warn-when-a-rescuer-could-not-be-created.patch
+	patches.suse/workqueue-Print-backtraces-from-CPUs-with-hung-CPU-b.patch
 	patches.suse/xfs-verify-buffer-contents-when-we-skip-log-replay.patch
 
 	# dhowells/linux-fs keys-uefi