From cef216fc32d7628206c523994e7e267e7a8dda59 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 12 Apr 2022 15:09:43 +0100
Subject: [PATCH] io_uring: explicitly keep a CQE in io_kiocb
Git-commit: cef216fc32d7628206c523994e7e267e7a8dda59
Patch-mainline: v5.19-rc1
References: bsc#1205205
We already have req->{result,user_data,cflags}, which mimic struct
io_uring_cqe and are intended to store CQE data. Combine them into a
struct io_uring_cqe field.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e1efe65d5005cd6a9ec3440767eb15a9fa9351cf.1649771823.git.asml.silence@gmail.com
[axboe: add mirror cqe to cater to fd union]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
---
fs/io_uring.c | 136 +++++++++++++++++++++++++++++-----------------------------
1 file changed, 70 insertions(+), 66 deletions(-)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -865,6 +865,16 @@ enum {
IORING_RSRC_BUFFER = 1,
};
+struct io_cqe {
+ __u64 user_data;
+ __s32 res;
+ /* fd initially, then cflags for completion */
+ union {
+ __u32 flags;
+ int fd;
+ };
+};
+
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
@@ -908,13 +918,7 @@ struct io_kiocb {
u16 buf_index;
unsigned int flags;
- u64 user_data;
- u32 result;
- /* fd initially, then cflags for completion */
- union {
- u32 cflags;
- int fd;
- };
+ struct io_cqe cqe;
struct io_ring_ctx *ctx;
struct task_struct *task;
@@ -1468,7 +1472,7 @@ static inline void req_set_fail(struct i
static inline void req_fail_link_node(struct io_kiocb *req, int res)
{
req_set_fail(req);
- req->result = res;
+ req->cqe.res = res;
}
static __cold void io_ring_ctx_ref_free(struct percpu_ref *ref)
@@ -1700,7 +1704,7 @@ static void io_queue_async_work(struct i
if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
req->work.flags |= IO_WQ_WORK_CANCEL;
- trace_io_uring_queue_async_work(ctx, req, req->user_data, req->opcode, req->flags,
+ trace_io_uring_queue_async_work(ctx, req, req->cqe.user_data, req->opcode, req->flags,
&req->work, io_wq_is_hashed(&req->work));
io_wq_enqueue(tctx->io_wq, &req->work);
if (link)
@@ -2040,8 +2044,8 @@ static inline bool __io_fill_cqe(struct
static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
{
- trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
- return __io_fill_cqe(req->ctx, req->user_data, res, cflags);
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags);
+ return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
}
static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
@@ -2107,8 +2111,8 @@ static void io_req_complete_post(struct
static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
u32 cflags)
{
- req->result = res;
- req->cflags = cflags;
+ req->cqe.res = res;
+ req->cqe.flags = cflags;
req->flags |= REQ_F_COMPLETE_INLINE;
}
@@ -2140,7 +2144,7 @@ static void io_req_complete_fail_submit(
*/
req->flags &= ~REQ_F_HARDLINK;
req->flags |= REQ_F_LINK;
- io_req_complete_failed(req, req->result);
+ io_req_complete_failed(req, req->cqe.res);
}
/*
@@ -2153,7 +2157,7 @@ static void io_preinit_req(struct io_kio
req->link = NULL;
req->async_data = NULL;
/* not necessary, but safer to zero */
- req->result = 0;
+ req->cqe.res = 0;
}
static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
@@ -2307,12 +2311,12 @@ static void io_fail_links(struct io_kioc
long res = -ECANCELED;
if (link->flags & REQ_F_FAIL)
- res = link->result;
+ res = link->cqe.res;
nxt = link->link;
link->link = NULL;
- trace_io_uring_fail_link(req->ctx, req, req->user_data,
+ trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data,
req->opcode, link);
if (!ignore_cqes) {
@@ -2432,7 +2436,7 @@ static void handle_prev_tw_list(struct i
if (likely(*uring_locked))
req->io_task_work.func(req, uring_locked);
else
- __io_req_complete_post(req, req->result,
+ __io_req_complete_post(req, req->cqe.res,
io_put_kbuf_comp(req));
node = next;
} while (node);
@@ -2562,7 +2566,7 @@ static void io_req_task_cancel(struct io
/* not needed for normal modes, but SQPOLL depends on it */
io_tw_lock(ctx, locked);
- io_req_complete_failed(req, req->result);
+ io_req_complete_failed(req, req->cqe.res);
}
static void io_req_task_submit(struct io_kiocb *req, bool *locked)
@@ -2579,7 +2583,7 @@ static void io_req_task_submit(struct io
static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
{
- req->result = ret;
+ req->cqe.res = ret;
req->io_task_work.func = io_req_task_cancel;
io_req_task_work_add(req, false);
}
@@ -2679,7 +2683,7 @@ static void __io_submit_flush_completion
comp_list);
if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe_req(req, req->result, req->cflags);
+ __io_fill_cqe_req(req, req->cqe.res, req->cqe.flags);
}
io_commit_cqring(ctx);
@@ -2802,7 +2806,7 @@ static int io_do_iopoll(struct io_ring_c
nr_events++;
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;
- __io_fill_cqe_req(req, req->result, io_put_kbuf(req, 0));
+ __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
}
if (unlikely(!nr_events))
@@ -2960,21 +2964,21 @@ static bool __io_complete_rw_common(stru
} else {
fsnotify_access(req->file);
}
- if (unlikely(res != req->result)) {
+ if (unlikely(res != req->cqe.res)) {
if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE;
return true;
}
req_set_fail(req);
- req->result = res;
+ req->cqe.res = res;
}
return false;
}
static inline void io_req_task_complete(struct io_kiocb *req, bool *locked)
{
- int res = req->result;
+ int res = req->cqe.res;
if (*locked) {
io_req_complete_state(req, res, io_put_kbuf(req, 0));
@@ -2990,7 +2994,7 @@ static void __io_complete_rw(struct io_k
{
if (__io_complete_rw_common(req, res))
return;
- __io_req_complete(req, issue_flags, req->result,
+ __io_req_complete(req, issue_flags, req->cqe.res,
io_put_kbuf(req, issue_flags));
}
@@ -3000,7 +3004,7 @@ static void io_complete_rw(struct kiocb
if (__io_complete_rw_common(req, res))
return;
- req->result = res;
+ req->cqe.res = res;
req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
}
@@ -3011,12 +3015,12 @@ static void io_complete_rw_iopoll(struct
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
- if (unlikely(res != req->result)) {
+ if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE;
return;
}
- req->result = res;
+ req->cqe.res = res;
}
/* order with io_iopoll_complete() checking ->iopoll_completed */
@@ -3814,7 +3818,7 @@ static int io_read(struct io_kiocb *req,
kfree(iovec);
return ret;
}
- req->result = iov_iter_count(&s->iter);
+ req->cqe.res = iov_iter_count(&s->iter);
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
@@ -3830,7 +3834,7 @@ static int io_read(struct io_kiocb *req,
ppos = io_kiocb_update_pos(req);
- ret = rw_verify_area(READ, req->file, ppos, req->result);
+ ret = rw_verify_area(READ, req->file, ppos, req->cqe.res);
if (unlikely(ret)) {
kfree(iovec);
return ret;
@@ -3852,7 +3856,7 @@ static int io_read(struct io_kiocb *req,
ret = 0;
} else if (ret == -EIOCBQUEUED) {
goto out_free;
- } else if (ret == req->result || ret <= 0 || !force_nonblock ||
+ } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock ||
(req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
@@ -3942,7 +3946,7 @@ static int io_write(struct io_kiocb *req
kfree(iovec);
return ret;
}
- req->result = iov_iter_count(&s->iter);
+ req->cqe.res = iov_iter_count(&s->iter);
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
@@ -3962,7 +3966,7 @@ static int io_write(struct io_kiocb *req
ppos = io_kiocb_update_pos(req);
- ret = rw_verify_area(WRITE, req->file, ppos, req->result);
+ ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res);
if (unlikely(ret))
goto out_free;
@@ -5740,7 +5744,7 @@ static void io_poll_req_insert(struct io
struct io_ring_ctx *ctx = req->ctx;
struct hlist_head *list;
- list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
+ list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)];
hlist_add_head(&req->hash_node, list);
}
@@ -5805,7 +5809,7 @@ static void io_poll_remove_entries(struc
*
* Returns a negative error on failure. >0 when no action require, which is
* either spurious wakeup or multishot CQE is served. 0 when it's done with
- * the request, then the mask is stored in req->result.
+ * the request, then the mask is stored in req->cqe.res.
*/
static int io_poll_check_events(struct io_kiocb *req, bool locked)
{
@@ -5825,29 +5829,29 @@ static int io_poll_check_events(struct i
if (v & IO_POLL_CANCEL_FLAG)
return -ECANCELED;
- if (!req->result) {
+ if (!req->cqe.res) {
struct poll_table_struct pt = { ._key = req->apoll_events };
unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED;
if (unlikely(!io_assign_file(req, flags)))
return -EBADF;
- req->result = vfs_poll(req->file, &pt) & req->apoll_events;
+ req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
}
/* multishot, just fill an CQE and proceed */
- if (req->result && !(req->apoll_events & EPOLLONESHOT)) {
- __poll_t mask = mangle_poll(req->result & req->apoll_events);
+ if (req->cqe.res && !(req->apoll_events & EPOLLONESHOT)) {
+ __poll_t mask = mangle_poll(req->cqe.res & req->apoll_events);
bool filled;
spin_lock(&ctx->completion_lock);
- filled = io_fill_cqe_aux(ctx, req->user_data, mask,
+ filled = io_fill_cqe_aux(ctx, req->cqe.user_data, mask,
IORING_CQE_F_MORE);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
if (unlikely(!filled))
return -ECANCELED;
io_cqring_ev_posted(ctx);
- } else if (req->result) {
+ } else if (req->cqe.res) {
return 0;
}
@@ -5870,16 +5874,16 @@ static void io_poll_task_func(struct io_
return;
if (!ret) {
- req->result = mangle_poll(req->result & req->poll.events);
+ req->cqe.res = mangle_poll(req->cqe.res & req->poll.events);
} else {
- req->result = ret;
+ req->cqe.res = ret;
req_set_fail(req);
}
io_poll_remove_entries(req);
spin_lock(&ctx->completion_lock);
hash_del(&req->hash_node);
- __io_req_complete_post(req, req->result, 0);
+ __io_req_complete_post(req, req->cqe.res, 0);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
@@ -5907,7 +5911,7 @@ static void io_apoll_task_func(struct io
static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
{
- req->result = mask;
+ req->cqe.res = mask;
/*
* This is useful for poll that is armed on behalf of another
* request, and where the wakeup path could be on a different
@@ -5920,7 +5924,7 @@ static void __io_poll_execute(struct io_
else
req->io_task_work.func = io_apoll_task_func;
- trace_io_uring_task_add(req->ctx, req, req->user_data, req->opcode, mask);
+ trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask);
io_req_task_work_add(req, false);
}
@@ -6170,7 +6174,7 @@ static int io_arm_poll_handler(struct io
if (ret || ipt.error)
return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
- trace_io_uring_poll_arm(ctx, req, req->user_data, req->opcode,
+ trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode,
mask, apoll->poll.events);
return IO_APOLL_OK;
}
@@ -6213,7 +6217,7 @@ static struct io_kiocb *io_poll_find(str
list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
hlist_for_each_entry(req, list, hash_node) {
- if (sqe_addr != req->user_data)
+ if (sqe_addr != req->cqe.user_data)
continue;
if (poll_only && req->opcode != IORING_OP_POLL_ADD)
continue;
@@ -6350,7 +6354,7 @@ static int io_poll_update(struct io_kioc
preq->poll.events |= IO_POLL_UNMASK;
}
if (req->poll_update.update_user_data)
- preq->user_data = req->poll_update.new_user_data;
+ preq->cqe.user_data = req->poll_update.new_user_data;
ret2 = io_poll_add(preq, issue_flags);
/* successfully updated, don't complete poll request */
@@ -6359,7 +6363,7 @@ static int io_poll_update(struct io_kioc
}
req_set_fail(preq);
- preq->result = -ECANCELED;
+ preq->cqe.res = -ECANCELED;
locked = !(issue_flags & IO_URING_F_UNLOCKED);
io_req_task_complete(preq, &locked);
out:
@@ -6387,7 +6391,7 @@ static enum hrtimer_restart io_timeout_f
if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
req_set_fail(req);
- req->result = -ETIME;
+ req->cqe.res = -ETIME;
req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req, false);
return HRTIMER_NORESTART;
@@ -6402,7 +6406,7 @@ static struct io_kiocb *io_timeout_extra
bool found = false;
list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
- found = user_data == req->user_data;
+ found = user_data == req->cqe.user_data;
if (found)
break;
}
@@ -6453,7 +6457,7 @@ static int io_linked_timeout_update(stru
bool found = false;
list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
- found = user_data == req->user_data;
+ found = user_data == req->cqe.user_data;
if (found)
break;
}
@@ -6679,7 +6683,7 @@ static bool io_cancel_cb(struct io_wq_wo
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_cancel_data *cd = data;
- return req->ctx == cd->ctx && req->user_data == cd->user_data;
+ return req->ctx == cd->ctx && req->cqe.user_data == cd->user_data;
}
static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
@@ -6911,7 +6915,7 @@ static int io_req_prep_async(struct io_k
/* assign early for deferred execution for non-fixed file */
if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE))
- req->file = io_file_get_normal(req, req->fd);
+ req->file = io_file_get_normal(req, req->cqe.fd);
if (!def->needs_async_setup)
return 0;
if (WARN_ON_ONCE(req_has_async_data(req)))
@@ -6985,7 +6989,7 @@ fail:
goto queue;
}
- trace_io_uring_defer(ctx, req, req->user_data, req->opcode);
+ trace_io_uring_defer(ctx, req, req->cqe.user_data, req->opcode);
de->req = req;
de->seq = seq;
list_add_tail(&de->list, &ctx->defer_list);
@@ -7065,14 +7069,14 @@ static bool io_assign_file(struct io_kio
return true;
if (req->flags & REQ_F_FIXED_FILE)
- req->file = io_file_get_fixed(req, req->fd, issue_flags);
+ req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags);
else
- req->file = io_file_get_normal(req, req->fd);
+ req->file = io_file_get_normal(req, req->cqe.fd);
if (req->file)
return true;
req_set_fail(req);
- req->result = -EBADF;
+ req->cqe.res = -EBADF;
return false;
}
@@ -7355,7 +7359,7 @@ static struct file *io_file_get_normal(s
{
struct file *file = fget(fd);
- trace_io_uring_file_get(req->ctx, req, req->user_data, fd);
+ trace_io_uring_file_get(req->ctx, req, req->cqe.user_data, fd);
/* we don't allow fixed io_uring files */
if (file && file->f_op == &io_uring_fops)
@@ -7370,7 +7374,7 @@ static void io_req_task_link_timeout(str
if (prev) {
if (!(req->task->flags & PF_EXITING))
- ret = io_try_cancel_userdata(req, prev->user_data);
+ ret = io_try_cancel_userdata(req, prev->cqe.user_data);
io_req_complete_post(req, ret ?: -ETIME, 0);
io_put_req(prev);
} else {
@@ -7561,7 +7565,7 @@ static int io_init_req(struct io_ring_ct
req->opcode = opcode = READ_ONCE(sqe->opcode);
/* same numerical values with corresponding REQ_F_*, safe to copy */
req->flags = sqe_flags = READ_ONCE(sqe->flags);
- req->user_data = READ_ONCE(sqe->user_data);
+ req->cqe.user_data = READ_ONCE(sqe->user_data);
req->file = NULL;
req->fixed_rsrc_refs = NULL;
req->task = current;
@@ -7602,7 +7606,7 @@ static int io_init_req(struct io_ring_ct
if (io_op_defs[opcode].needs_file) {
struct io_submit_state *state = &ctx->submit_state;
- req->fd = READ_ONCE(sqe->fd);
+ req->cqe.fd = READ_ONCE(sqe->fd);
/*
* Plug now if we have more than 2 IO left after this, and the
@@ -7644,7 +7648,7 @@ static int io_submit_sqe(struct io_ring_
* we can judge a link req is failed or cancelled by if
* REQ_F_FAIL is set, but the head is an exception since
* it may be set REQ_F_FAIL because of other req's failure
- * so let's leverage req->result to distinguish if a head
+ * so let's leverage req->cqe.res to distinguish if a head
* is set REQ_F_FAIL because of its failure or other req's
* failure so that we can set the correct ret code for it.
* init result here to avoid affecting the normal path.
@@ -7663,7 +7667,7 @@ static int io_submit_sqe(struct io_ring_
}
/* don't need @sqe from now on */
- trace_io_uring_submit_sqe(ctx, req, req->user_data, req->opcode,
+ trace_io_uring_submit_sqe(ctx, req, req->cqe.user_data, req->opcode,
req->flags, true,
ctx->flags & IORING_SETUP_SQPOLL);