Blob Blame History Raw
From 58d3be2c60d2cf4e6bb65bb6200fa39a7bc477f9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 9 Aug 2021 13:04:17 +0100
Subject: [PATCH] io_uring: improve ctx hang handling
Git-commit: 58d3be2c60d2cf4e6bb65bb6200fa39a7bc477f9
Patch-mainline: v5.15-rc1
References: bsc#1199011

If io_ring_exit_work() can't get it done in 5 minutes, something is
going very wrong, don't keep spinning at HZ / 20 rate, it doesn't help
and it may take much of CPU time if there is a lot of workers stuck as
such.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/9e2d1ca81d569f6bc628af1a42ff6663bff7ce9c.1628471125.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Acked-by: David Disseldorp <ddiss@suse.de>

---
 fs/io_uring.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 26a3ab9b6aed..af9472e3f17b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8795,6 +8795,7 @@ static void io_ring_exit_work(struct work_struct *work)
 {
 	struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
 	unsigned long timeout = jiffies + HZ * 60 * 5;
+	unsigned long interval = HZ / 20;
 	struct io_tctx_exit exit;
 	struct io_tctx_node *node;
 	int ret;
@@ -8819,8 +8820,11 @@ static void io_ring_exit_work(struct work_struct *work)
 			io_sq_thread_unpark(sqd);
 		}
 
-		WARN_ON_ONCE(time_after(jiffies, timeout));
-	} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
+		if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
+			/* there is little hope left, don't run it too often */
+			interval = HZ * 60;
+		}
+	} while (!wait_for_completion_timeout(&ctx->ref_comp, interval));
 
 	init_completion(&exit.completion);
 	init_task_work(&exit.task_work, io_tctx_exit_cb);
-- 
2.34.1