From 71e0f8d6f14cd4e0b47a2d5ceecc46b85270197e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 8 Sep 2021 10:09:29 +0100
Subject: [PATCH 1/4] io-wq: fix cancellation on create-worker failure
Git-commit: 713b9825a4c47897f66ad69409581e7734a8728e
Patch-mainline: v5.15-rc1
References: bnc#1205113
WARNING: CPU: 0 PID: 10392 at fs/io_uring.c:1151 req_ref_put_and_test
fs/io_uring.c:1151 [inline]
WARNING: CPU: 0 PID: 10392 at fs/io_uring.c:1151 req_ref_put_and_test
fs/io_uring.c:1146 [inline]
WARNING: CPU: 0 PID: 10392 at fs/io_uring.c:1151
io_req_complete_post+0xf5b/0x1190 fs/io_uring.c:1794
Modules linked in:
Call Trace:
tctx_task_work+0x1e5/0x570 fs/io_uring.c:2158
task_work_run+0xe0/0x1a0 kernel/task_work.c:164
tracehook_notify_signal include/linux/tracehook.h:212 [inline]
handle_signal_work kernel/entry/common.c:146 [inline]
exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
exit_to_user_mode_prepare+0x232/0x2a0 kernel/entry/common.c:209
__syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:302
do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
entry_SYSCALL_64_after_hwframe+0x44/0xae
When io_wqe_enqueue() -> io_wqe_create_worker() fails, we can't just
call io_run_cancel() to clean up the request, it's already enqueued via
io_wqe_insert_work() and will be executed either by some other worker
during cancellation (e.g. in io_wq_put_and_exit()).
Reported-by: Hao Sun <sunhao.th@gmail.com>
Fixes: 3146cba99aa28 ("io-wq: make worker creation resilient against signals")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/93b9de0fcf657affab0acfd675d4abcd273ee863.1631092071.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
---
fs/io-wq.c | 29 ++++++++++++++++++++---------
1 file changed, 20 insertions(+), 9 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 40e27bc30fb0..048d68291814 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -845,6 +845,11 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
wq_list_add_after(&work->list, &tail->list, &acct->work_list);
}
+static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
+{
+ return work == data;
+}
+
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
@@ -857,7 +862,6 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
*/
if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
(work->flags & IO_WQ_WORK_CANCEL)) {
-run_cancel:
io_run_cancel(work, wqe);
return;
}
@@ -877,15 +881,22 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
bool did_create;
did_create = io_wqe_create_worker(wqe, acct);
- if (unlikely(!did_create)) {
- raw_spin_lock(&wqe->lock);
- /* fatal condition, failed to create the first worker */
- if (!acct->nr_workers) {
- raw_spin_unlock(&wqe->lock);
- goto run_cancel;
- }
- raw_spin_unlock(&wqe->lock);
+ if (likely(did_create))
+ return;
+
+ raw_spin_lock(&wqe->lock);
+ /* fatal condition, failed to create the first worker */
+ if (!acct->nr_workers) {
+ struct io_cb_cancel_data match = {
+ .fn = io_wq_work_match_item,
+ .data = work,
+ .cancel_all = false,
+ };
+
+ if (io_acct_cancel_pending_work(wqe, acct, &match))
+ raw_spin_lock(&wqe->lock);
}
+ raw_spin_unlock(&wqe->lock);
}
}
--
2.35.3