Blob Blame History Raw
From 126180b95f27ef6cc536da57115e06665254b0d7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 18 Aug 2021 12:42:47 +0100
Subject: [PATCH] io_uring: IRQ rw completion batching
Git-commit: 126180b95f27ef6cc536da57115e06665254b0d7
Patch-mainline: v5.15-rc1
References: bsc#1205205

Employ inline completion logic for read/write completions done via
io_req_task_complete(). If ->uring_lock is contended, just do normal
request completion, but if not, make tctx_task_work() to grab the lock
and do batched inline completions in io_req_task_complete().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/94589c3ce69eaed86a21bb1ec696407a54fab1aa.1629286357.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
---
 fs/io_uring.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index cb98a831586a..827e60ae4909 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2067,6 +2067,8 @@ static void tctx_task_work(struct callback_head *cb)
 			if (req->ctx != ctx) {
 				ctx_flush_and_put(ctx, &locked);
 				ctx = req->ctx;
+				/* if not contended, grab and improve batching */
+				locked = mutex_trylock(&ctx->uring_lock);
 				percpu_ref_get(&ctx->refs);
 			}
 			req->io_task_work.func(req, &locked);
@@ -2582,7 +2584,20 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
 
 static void io_req_task_complete(struct io_kiocb *req, bool *locked)
 {
-	__io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
+	unsigned int cflags = io_put_rw_kbuf(req);
+	long res = req->result;
+
+	if (*locked) {
+		struct io_ring_ctx *ctx = req->ctx;
+		struct io_submit_state *state = &ctx->submit_state;
+
+		io_req_complete_state(req, res, cflags);
+		state->compl_reqs[state->compl_nr++] = req;
+		if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
+			io_submit_flush_completions(ctx);
+	} else {
+		io_req_complete_post(req, res, cflags);
+	}
 }
 
 static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
-- 
2.35.3