From 81459350d581e958ee9c6e76031f77333881c23c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 16 Mar 2022 16:55:05 -0600
Subject: [PATCH] io_uring: cache req->apoll->events in req->cflags
Git-commit: 81459350d581e958ee9c6e76031f77333881c23c
Patch-mainline: v5.18-rc1
References: bsc#1205205
When we arm poll on behalf of a different type of request, like a network
receive, then we allocate req->apoll as our poll entry. Running network
workloads shows io_poll_check_events() as the most expensive part of
io_uring, and it's all due to having to pull in req->apoll instead of
just the request which we have hot already.
Cache poll->events in req->cflags, which isn't used until the request
completes anyway. This isn't strictly needed for regular poll, where
req->poll.events is used and thus already hot, but for the sake of
unification we do it all around.
This saves 3-4% of overhead in certain request workloads.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
---
fs/io_uring.c | 31 +++++++++++++++++++------------
1 file changed, 19 insertions(+), 12 deletions(-)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5730,13 +5730,13 @@ static int io_poll_check_events(struct i
return -ECANCELED;
if (!req->result) {
- struct poll_table_struct pt = { ._key = poll->events };
+ struct poll_table_struct pt = { ._key = req->cflags };
- req->result = vfs_poll(req->file, &pt) & poll->events;
+ req->result = vfs_poll(req->file, &pt) & req->cflags;
}
/* multishot, just fill an CQE and proceed */
- if (req->result && !(poll->events & EPOLLONESHOT)) {
+ if (req->result && !(req->cflags & EPOLLONESHOT)) {
__poll_t mask = mangle_poll(req->result & poll->events);
bool filled;
@@ -5806,9 +5806,16 @@ static void io_apoll_task_func(struct io
io_req_complete_failed(req, ret);
}
-static void __io_poll_execute(struct io_kiocb *req, int mask)
+static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
{
req->result = mask;
+ /*
+ * This is useful for poll that is armed on behalf of another
+ * request, and where the wakeup path could be on a different
+ * CPU. We want to avoid pulling in req->apoll->events for that
+ * case.
+ */
+ req->cflags = events;
if (req->opcode == IORING_OP_POLL_ADD)
req->io_task_work.func = io_poll_task_func;
else
@@ -5818,17 +5825,17 @@ static void __io_poll_execute(struct io_
io_req_task_work_add(req, false);
}
-static inline void io_poll_execute(struct io_kiocb *req, int res)
+static inline void io_poll_execute(struct io_kiocb *req, int res, int events)
{
if (io_poll_get_ownership(req))
- __io_poll_execute(req, res);
+ __io_poll_execute(req, res, events);
}
static void io_poll_cancel_req(struct io_kiocb *req)
{
io_poll_mark_cancelled(req);
/* kick tw, which should complete the request */
- io_poll_execute(req, 0);
+ io_poll_execute(req, 0, 0);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -5842,7 +5849,7 @@ static int io_poll_wake(struct wait_queu
if (unlikely(mask & POLLFREE)) {
io_poll_mark_cancelled(req);
/* we have to kick tw in case it's not already */
- io_poll_execute(req, 0);
+ io_poll_execute(req, 0, poll->events);
/*
* If the waitqueue is being freed early but someone is already
@@ -5873,7 +5880,7 @@ static int io_poll_wake(struct wait_queu
list_del_init(&poll->wait.entry);
poll->head = NULL;
}
- __io_poll_execute(req, mask);
+ __io_poll_execute(req, mask, poll->events);
}
return 1;
}
@@ -5977,7 +5984,7 @@ static int __io_arm_poll_handler(struct
/* can't multishot if failed, just queue the event we've got */
if (unlikely(ipt->error || !ipt->nr_entries))
poll->events |= EPOLLONESHOT;
- __io_poll_execute(req, mask);
+ __io_poll_execute(req, mask, poll->events);
return 0;
}
@@ -5987,7 +5994,7 @@ static int __io_arm_poll_handler(struct
*/
v = atomic_dec_return(&req->poll_refs);
if (unlikely(v & IO_POLL_REF_MASK))
- __io_poll_execute(req, 0);
+ __io_poll_execute(req, 0, poll->events);
return 0;
}
@@ -6186,7 +6193,7 @@ static int io_poll_add_prep(struct io_ki
return -EINVAL;
io_req_set_refcount(req);
- poll->events = io_poll_parse_events(sqe, flags);
+ req->cflags = poll->events = io_poll_parse_events(sqe, flags);
return 0;
}