diff --git a/patches.suse/wq-handle-VM-suspension-in-stall-detection.patch b/patches.suse/wq-handle-VM-suspension-in-stall-detection.patch new file mode 100644 index 0000000..e461fa7 --- /dev/null +++ b/patches.suse/wq-handle-VM-suspension-in-stall-detection.patch @@ -0,0 +1,89 @@ +From 940d71c6462e8151c78f28e4919aa8882ff2054e Mon Sep 17 00:00:00 2001 +From: Sergey Senozhatsky +Date: Thu, 20 May 2021 19:14:22 +0900 +Subject: [PATCH] wq: handle VM suspension in stall detection +Git-commit: 940d71c6462e8151c78f28e4919aa8882ff2054e +Patch-mainline: v5.13-rc4 +References: bsc#1210466 + +If VCPU is suspended (VM suspend) in wq_watchdog_timer_fn() then +once this VCPU resumes it will see the new jiffies value, while it +may take a while before IRQ detects PVCLOCK_GUEST_STOPPED on this +VCPU and updates all the watchdogs via pvclock_touch_watchdogs(). +There is a small chance of misreported WQ stalls in the meantime, +because new jiffies is time_after() old 'ts + thresh'. + +wq_watchdog_timer_fn() +{ + for_each_pool(pool, pi) { + if (time_after(jiffies, ts + thresh)) { + pr_emerg("BUG: workqueue lockup - pool"); + } + } +} + +Save jiffies at the beginning of this function and use that value +for stall detection. If VM gets suspended then we continue using +"old" jiffies value and old WQ touch timestamps. If IRQ at some +point restarts the stall detection cycle (pvclock_touch_watchdogs()) +then old jiffies will always be before new 'ts + thresh'. + +Signed-off-by: Sergey Senozhatsky +Signed-off-by: Tejun Heo +Acked-by: Petr Mladek + +--- + kernel/workqueue.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/kernel/workqueue.c b/kernel/workqueue.c +index b19d759e55a5..50142fc08902 100644 +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + + #include "workqueue_internal.h" + +@@ -5772,6 +5773,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) + { + unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; + bool lockup_detected = false; ++ unsigned long now = jiffies; + struct worker_pool *pool; + int pi; + +@@ -5786,6 +5788,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) + if (list_empty(&pool->worklist)) + continue; + ++ /* ++ * If a virtual machine is stopped by the host it can look to ++ * the watchdog like a stall. ++ */ ++ kvm_check_and_clear_guest_paused(); ++ + /* get the latest of pool and touched timestamps */ + pool_ts = READ_ONCE(pool->watchdog_ts); + touched = READ_ONCE(wq_watchdog_touched); +@@ -5799,12 +5807,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) + } + + /* did we stall? */ +- if (time_after(jiffies, ts + thresh)) { ++ if (time_after(now, ts + thresh)) { + lockup_detected = true; + pr_emerg("BUG: workqueue lockup - pool"); + pr_cont_pool_info(pool); + pr_cont(" stuck for %us!\n", +- jiffies_to_msecs(jiffies - pool_ts) / 1000); ++ jiffies_to_msecs(now - pool_ts) / 1000); + } + } + +-- +2.35.3 + diff --git a/series.conf b/series.conf index 51b3f8a..390cf61 100644 --- a/series.conf +++ b/series.conf @@ -60624,6 +60624,7 @@ patches.suse/xen-pciback-reconfigure-also-from-backend-watch-hand.patch patches.suse/nvme-fc-clear-q_live-at-beginning-of-association-tea.patch patches.suse/locking-mutex-clear-MUTEX_FLAGS-if-wait_list-is-empt.patch + patches.suse/wq-handle-VM-suspension-in-stall-detection.patch patches.suse/net-nfc-rawsock.c-fix-a-permission-check-bug.patch patches.suse/net-netcp-Fix-an-error-message.patch patches.suse/net-mlx4-Fix-EEPROM-dump-support.patch