From 767a65e9f31789d80e41edd03a802314905e8fbf Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Sun, 12 Sep 2021 03:40:52 +0800
Subject: [PATCH] io-wq: fix potential race of acct->nr_workers
Git-commit: 767a65e9f31789d80e41edd03a802314905e8fbf
Patch-mainline: v5.15-rc2
References: bsc#1205205
Given max_worker is 1, and we currently have 1 running and it is
exiting. There may be race like:
io_wqe_enqueue worker1
no work there and timeout
unlock(wqe->lock)
->insert work
-->io_worker_exit
lock(wqe->lock)
->if(!nr_workers) //it's still 1
unlock(wqe->lock)
goto run_cancel
lock(wqe->lock)
nr_workers--
->dec_running
->worker creation fails
unlock(wqe->lock)
We enqueued one work but there is no workers, causes hung.
Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
---
fs/io-wq.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index a1685b40a4bf..3d4460df845c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -176,7 +176,6 @@ static void io_worker_ref_put(struct io_wq *wq)
static void io_worker_exit(struct io_worker *worker)
{
struct io_wqe *wqe = worker->wqe;
- struct io_wqe_acct *acct = io_wqe_get_acct(worker);
if (refcount_dec_and_test(&worker->ref))
complete(&worker->ref_done);
@@ -186,7 +185,6 @@ static void io_worker_exit(struct io_worker *worker)
if (worker->flags & IO_WORKER_F_FREE)
hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
- acct->nr_workers--;
preempt_disable();
io_wqe_dec_running(worker);
worker->flags = 0;
@@ -569,6 +567,7 @@ static int io_wqe_worker(void *data)
}
/* timed out, exit unless we're the last worker */
if (last_timeout && acct->nr_workers > 1) {
+ acct->nr_workers--;
raw_spin_unlock(&wqe->lock);
__set_current_state(TASK_RUNNING);
break;
--
2.35.3