diff --git a/blacklist.conf b/blacklist.conf index 095402a..8bedd97 100644 --- a/blacklist.conf +++ b/blacklist.conf @@ -549,7 +549,6 @@ d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 # Breaks build 9aee5f8a7e30330d0a8f4c626dc924ca5590aba5 # Doesn't fix a bug, breaks kABI 45b575c00d8e72d69d75dd8c112f044b7b01b069 # Doesn't fix a bug, breaks kABI 78ce241099bb363b19dbd0245442e66c8de8f567 # Not relevant -def98c84b6cdf2eeea19ec5736e90e316df5206b # workqueue: Too intrusive. Could the rescuer be needed to drain the queue? Anyway, the most likely reason for sanity check failure was hopefully fixed by commit e66b39af00f426b3356b ("workqueue: Fix pwq ref leak in rescuer_thread()") 65099ea85e885c3ea1272eca8774b771419d8ce8 # iio: revert: not applicable 12f92866f13f9ca12e158c07978246ed83d52ed0 # media: revert: not applicable 99fb0f25c448ab72481bd700b66e0e48c583ef5a # mfd: cros_ec: not applicable @@ -564,7 +563,9 @@ c3fee60908db4a8594f2e4a2131998384b8fa006 # printk: cosmetic; anyway, it fixes a 0f7636e1654338c34e3c220c02b2ffad78b6ccc0 # printk: cosmetic; documentation 89ccf18f032f26946e2ea6258120472eec6aa745 # printk: not critical; allow to use the full buffer when using log dumpers b665eae7a788c5e2bc10f9ac3c0137aa0ad1fc97 # printk: cosmetic problem +d2130e82e9454304e9b91ba9da551b5989af8c27 # printk: cosmetic problem; wrong value shown in log 57116ce17b04fde2fe30f0859df69d8dbe5809f6 # printk/workqueue: very hard to hit; works well with lockless ringuffer; but it might cause wrong timestamps or even lost messages on 4.12 where per-CPU buffers are used +900fdc4573766dd43b847b4f54bd4a1ee2bc7360 # vsprintf: non-trivial change that modifies the behavior a bit; it should be safe because it is in the mainline for a long time without regression reports; but who knows; it is rather a corner case; it does not look worth the risk 075e1a0c50f59ea210561d0d0fedbd945615df78 # sysrq: prehistoric bug, non-critical, found by code review b642e44e8ab335868b549fe5753b783ca47bf3a3 # kstrto*: comment fix ef0f2685336bbc334e8b6997ce9b155e5f7edd31 # kstrto*: comment fix @@ -573,6 +574,9 @@ b60706644282af04e4aa57da5af57470d453cd1f # vsprintf: cosmetic 741a76b350897604c48fb12beff1c9b77724dc96 # kthread: fixes rather rare races in CPU hotplug; there are several followup fixes on top of it to get it actually right; does not worth the risk 4ca1085c9573ea08767521dabce62456e3fc2fd0 # kthread: comment fix 0687c66b5f666b5ad433f4e94251590d9bc9d10e # kthread: Fixes debugging of the life cycle of work struct. Broken for ages. Disabled in our configuration. +1cf12e08bc4d50a76b80c42a3109c53d8794a0c9 # sched/hotplug: added here just to make sure that it will not be backported without followup fixes, e.g. ac687e6e8c26181a33 +ac687e6e8c26181a33270efd1a2e2241377924b0 # kthread: not needed; part of a regression fix for the commit 1cf12e08bc4d ("sched/hotplug: Consolidate task migration on CPU unplug"); the regression commit is blacklisted as well +01341fbd0d8d4e717fc1231cdffe00343088ce0b # workqueue: Non-trivial reasoning why the change is correct. Fixing a corner case. Workqueues are typically allocated only once during boot so that the problem should not happen at runtime. 4950276672fce5c241857540f8561c440663673d # kmemcheck removal; not for released products d8be75663cec0069b85f80191abd2682ce4a512f # related to kmemcheck removal; not for released products a6da0024ffc19e0d47712bb5ca4fd083f76b07df # blktrace: fix unlocked registration of tracepoints; racy for ages; found by syzcaller; not worth it diff --git a/patches.suse/printk-Give-error-on-attempt-to-set-log-buffer-lengt.patch b/patches.suse/printk-Give-error-on-attempt-to-set-log-buffer-lengt.patch new file mode 100644 index 0000000..4222a6f --- /dev/null +++ b/patches.suse/printk-Give-error-on-attempt-to-set-log-buffer-lengt.patch @@ -0,0 +1,89 @@ +From e6fe3e5b7d16e8f146a4ae7fe481bc6e97acde1e Mon Sep 17 00:00:00 2001 +From: He Zhe +Date: Sun, 30 Sep 2018 00:45:53 +0800 +Subject: [PATCH] printk: Give error on attempt to set log buffer length to + over 2G +Git-commit: e6fe3e5b7d16e8f146a4ae7fe481bc6e97acde1e +Patch-mainline: v4.20-rc1 +References: bsc#1210534 + +The current printk() is ready to handle log buffer size up to 2G. +Give an explicit error for users who want to use larger log buffer. + +Also fix printk formatting to show the 2G as a positive number. + +Link: http://lkml.kernel.org/r/20181008135916.gg4kkmoki5bgtco5@pathway.suse.cz +Cc: rostedt@goodmis.org +Cc: linux-kernel@vger.kernel.org +Suggested-by: Sergey Senozhatsky +Signed-off-by: He Zhe +Reviewed-by: Sergey Senozhatsky +[pmladek: Fixed to the really safe limit 2GB.] +Signed-off-by: Petr Mladek + +--- + kernel/printk/printk.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 15f3e70be448..fce696d80e09 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -440,6 +440,7 @@ static u32 clear_idx; + /* record buffer */ + #define LOG_ALIGN __alignof__(struct printk_log) + #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) ++#define LOG_BUF_LEN_MAX (u32)(1 << 31) + static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); + static char *log_buf = __log_buf; + static u32 log_buf_len = __LOG_BUF_LEN; +@@ -1040,18 +1041,23 @@ void log_buf_vmcoreinfo_setup(void) + static unsigned long __initdata new_log_buf_len; + + /* we practice scaling the ring buffer by powers of 2 */ +-static void __init log_buf_len_update(unsigned size) ++static void __init log_buf_len_update(u64 size) + { ++ if (size > (u64)LOG_BUF_LEN_MAX) { ++ size = (u64)LOG_BUF_LEN_MAX; ++ pr_err("log_buf over 2G is not supported.\n"); ++ } ++ + if (size) + size = roundup_pow_of_two(size); + if (size > log_buf_len) +- new_log_buf_len = size; ++ new_log_buf_len = (unsigned long)size; + } + + /* save requested log_buf_len since it's too early to process it */ + static int __init log_buf_len_setup(char *str) + { +- unsigned int size; ++ u64 size; + + if (!str) + return -EINVAL; +@@ -1121,7 +1127,7 @@ void __init setup_log_buf(int early) + } + + if (unlikely(!new_log_buf)) { +- pr_err("log_buf_len: %ld bytes not available\n", ++ pr_err("log_buf_len: %lu bytes not available\n", + new_log_buf_len); + return; + } +@@ -1134,8 +1140,8 @@ void __init setup_log_buf(int early) + memcpy(log_buf, __log_buf, __LOG_BUF_LEN); + logbuf_unlock_irqrestore(flags); + +- pr_info("log_buf_len: %d bytes\n", log_buf_len); +- pr_info("early log buf free: %d(%d%%)\n", ++ pr_info("log_buf_len: %u bytes\n", log_buf_len); ++ pr_info("early log buf free: %u(%u%%)\n", + free, (free * 100) / __LOG_BUF_LEN); + } + +-- +2.35.3 + diff --git a/patches.suse/workqueue-Fix-missing-kfree-rescuer-in-destroy_workq.patch b/patches.suse/workqueue-Fix-missing-kfree-rescuer-in-destroy_workq.patch new file mode 100644 index 0000000..7bed0ba --- /dev/null +++ b/patches.suse/workqueue-Fix-missing-kfree-rescuer-in-destroy_workq.patch @@ -0,0 +1,32 @@ +From 8efe1223d73c218ce7e8b2e0e9aadb974b582d7f Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Fri, 20 Sep 2019 13:39:57 -0700 +Subject: [PATCH] workqueue: Fix missing kfree(rescuer) in destroy_workqueue() +Git-commit: 8efe1223d73c218ce7e8b2e0e9aadb974b582d7f +Patch-mainline: v5.5-rc1 +References: bsc#1210460 + +Signed-off-by: Tejun Heo +Reported-by: Qian Cai +Fixes: def98c84b6cd ("workqueue: Fix spurious sanity check failures in destroy_workqueue()") +Acked-by: Petr Mladek + +--- + kernel/workqueue.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/workqueue.c b/kernel/workqueue.c +index 93e20f5330fc..3f067f1d72e3 100644 +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -4345,6 +4345,7 @@ void destroy_workqueue(struct workqueue_struct *wq) + + /* rescuer will empty maydays list before exiting */ + kthread_stop(rescuer->task); ++ kfree(rescuer); + } + + /* sanity checks */ +-- +2.35.3 + diff --git a/patches.suse/workqueue-Fix-spurious-sanity-check-failures-in-dest.patch b/patches.suse/workqueue-Fix-spurious-sanity-check-failures-in-dest.patch new file mode 100644 index 0000000..294a6e1 --- /dev/null +++ b/patches.suse/workqueue-Fix-spurious-sanity-check-failures-in-dest.patch @@ -0,0 +1,88 @@ +From def98c84b6cdf2eeea19ec5736e90e316df5206b Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 18 Sep 2019 18:43:40 -0700 +Subject: [PATCH] workqueue: Fix spurious sanity check failures in + destroy_workqueue() +Git-commit: def98c84b6cdf2eeea19ec5736e90e316df5206b +Patch-mainline: v5.5-rc1 +References: bsc#1210460 + +Before actually destrying a workqueue, destroy_workqueue() checks +whether it's actually idle. If it isn't, it prints out a bunch of +warning messages and leaves the workqueue dangling. It unfortunately +has a couple issues. + +* Mayday list queueing increments pwq's refcnts which gets detected as + busy and fails the sanity checks. However, because mayday list + queueing is asynchronous, this condition can happen without any + actual work items left in the workqueue. + +* Sanity check failure leaves the sysfs interface behind too which can + lead to init failure of newer instances of the workqueue. + +This patch fixes the above two by + +* If a workqueue has a rescuer, disable and kill the rescuer before + sanity checks. Disabling and killing is guaranteed to flush the + existing mayday list. + +* Remove sysfs interface before sanity checks. + +Signed-off-by: Tejun Heo +Reported-by: Marcin Pawlowski +Reported-by: "Williams, Gerald S" +Cc: stable@vger.kernel.org +Acked-by: Petr Mladek + +--- + kernel/workqueue.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +diff --git a/kernel/workqueue.c b/kernel/workqueue.c +index bc2e09a8ea61..93e20f5330fc 100644 +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -4325,9 +4325,28 @@ void destroy_workqueue(struct workqueue_struct *wq) + struct pool_workqueue *pwq; + int node; + ++ /* ++ * Remove it from sysfs first so that sanity check failure doesn't ++ * lead to sysfs name conflicts. ++ */ ++ workqueue_sysfs_unregister(wq); ++ + /* drain it before proceeding with destruction */ + drain_workqueue(wq); + ++ /* kill rescuer, if sanity checks fail, leave it w/o rescuer */ ++ if (wq->rescuer) { ++ struct worker *rescuer = wq->rescuer; ++ ++ /* this prevents new queueing */ ++ spin_lock_irq(&wq_mayday_lock); ++ wq->rescuer = NULL; ++ spin_unlock_irq(&wq_mayday_lock); ++ ++ /* rescuer will empty maydays list before exiting */ ++ kthread_stop(rescuer->task); ++ } ++ + /* sanity checks */ + mutex_lock(&wq->mutex); + for_each_pwq(pwq, wq) { +@@ -4359,11 +4378,6 @@ void destroy_workqueue(struct workqueue_struct *wq) + list_del_rcu(&wq->list); + mutex_unlock(&wq_pool_mutex); + +- workqueue_sysfs_unregister(wq); +- +- if (wq->rescuer) +- kthread_stop(wq->rescuer->task); +- + if (!(wq->flags & WQ_UNBOUND)) { + /* + * The base ref is never dropped on per-cpu pwqs. Directly +-- +2.35.3 + diff --git a/patches.suse/wq-handle-VM-suspension-in-stall-detection.patch b/patches.suse/wq-handle-VM-suspension-in-stall-detection.patch new file mode 100644 index 0000000..e461fa7 --- /dev/null +++ b/patches.suse/wq-handle-VM-suspension-in-stall-detection.patch @@ -0,0 +1,89 @@ +From 940d71c6462e8151c78f28e4919aa8882ff2054e Mon Sep 17 00:00:00 2001 +From: Sergey Senozhatsky +Date: Thu, 20 May 2021 19:14:22 +0900 +Subject: [PATCH] wq: handle VM suspension in stall detection +Git-commit: 940d71c6462e8151c78f28e4919aa8882ff2054e +Patch-mainline: v5.13-rc4 +References: bsc#1210466 + +If VCPU is suspended (VM suspend) in wq_watchdog_timer_fn() then +once this VCPU resumes it will see the new jiffies value, while it +may take a while before IRQ detects PVCLOCK_GUEST_STOPPED on this +VCPU and updates all the watchdogs via pvclock_touch_watchdogs(). +There is a small chance of misreported WQ stalls in the meantime, +because new jiffies is time_after() old 'ts + thresh'. + +wq_watchdog_timer_fn() +{ + for_each_pool(pool, pi) { + if (time_after(jiffies, ts + thresh)) { + pr_emerg("BUG: workqueue lockup - pool"); + } + } +} + +Save jiffies at the beginning of this function and use that value +for stall detection. If VM gets suspended then we continue using +"old" jiffies value and old WQ touch timestamps. If IRQ at some +point restarts the stall detection cycle (pvclock_touch_watchdogs()) +then old jiffies will always be before new 'ts + thresh'. + +Signed-off-by: Sergey Senozhatsky +Signed-off-by: Tejun Heo +Acked-by: Petr Mladek + +--- + kernel/workqueue.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/kernel/workqueue.c b/kernel/workqueue.c +index b19d759e55a5..50142fc08902 100644 +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + + #include "workqueue_internal.h" + +@@ -5772,6 +5773,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) + { + unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; + bool lockup_detected = false; ++ unsigned long now = jiffies; + struct worker_pool *pool; + int pi; + +@@ -5786,6 +5788,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) + if (list_empty(&pool->worklist)) + continue; + ++ /* ++ * If a virtual machine is stopped by the host it can look to ++ * the watchdog like a stall. ++ */ ++ kvm_check_and_clear_guest_paused(); ++ + /* get the latest of pool and touched timestamps */ + pool_ts = READ_ONCE(pool->watchdog_ts); + touched = READ_ONCE(wq_watchdog_touched); +@@ -5799,12 +5807,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) + } + + /* did we stall? */ +- if (time_after(jiffies, ts + thresh)) { ++ if (time_after(now, ts + thresh)) { + lockup_detected = true; + pr_emerg("BUG: workqueue lockup - pool"); + pr_cont_pool_info(pool); + pr_cont(" stuck for %us!\n", +- jiffies_to_msecs(jiffies - pool_ts) / 1000); ++ jiffies_to_msecs(now - pool_ts) / 1000); + } + } + +-- +2.35.3 + diff --git a/series.conf b/series.conf index f3067cf..73e244c 100644 --- a/series.conf +++ b/series.conf @@ -43047,6 +43047,7 @@ patches.suse/0456-crypto-caam-qi-simplify-CGR-allocation-freeing.patch patches.suse/crypto-chelsio-Update-ntx-queue-received-from-cxgb4.patch patches.suse/printk-Fix-panic-caused-by-passing-log_buf_len-to-co.patch + patches.suse/printk-Give-error-on-attempt-to-set-log-buffer-lengt.patch patches.suse/cgroup-netclassid-add-a-preemption-point-to-write_cl.patch patches.suse/kvm-s390-set-host-program-identifier.patch patches.suse/s390-sles15sp1-00-07-01-KVM-s390-vsie-simulate-VCPU-SIE-entry-exit.patch @@ -53697,6 +53698,8 @@ patches.suse/edac-amd64-save-max-number-of-controllers-to-family-type.patch patches.suse/edac-ghes-fix-locking-and-memory-barrier-issues.patch patches.suse/edac-ghes-do-not-warn-when-incrementing-refcount-on-0.patch + patches.suse/workqueue-Fix-spurious-sanity-check-failures-in-dest.patch + patches.suse/workqueue-Fix-missing-kfree-rescuer-in-destroy_workq.patch patches.suse/0001-workqueue-Fix-pwq-ref-leak-in-rescuer_thread.patch patches.suse/cgroup-pids-use-atomic64_t-for-pids-limit.patch patches.suse/livepatch-keep-replaced-patches-until-post_patch-callback-is-called.patch @@ -60628,6 +60631,7 @@ patches.suse/xen-pciback-reconfigure-also-from-backend-watch-hand.patch patches.suse/nvme-fc-clear-q_live-at-beginning-of-association-tea.patch patches.suse/locking-mutex-clear-MUTEX_FLAGS-if-wait_list-is-empt.patch + patches.suse/wq-handle-VM-suspension-in-stall-detection.patch patches.suse/net-nfc-rawsock.c-fix-a-permission-check-bug.patch patches.suse/net-netcp-Fix-an-error-message.patch patches.suse/net-mlx4-Fix-EEPROM-dump-support.patch