From ebdeb7c01d025cb059f05dc26b9dc914e46dd43f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 31 Mar 2022 19:27:52 -0600
Subject: [PATCH] io_uring: add support for 128-byte SQEs
Git-commit: ebdeb7c01d025cb059f05dc26b9dc914e46dd43f
Patch-mainline: v5.19-rc1
References: bsc#1205205
Normal SQEs are 64-bytes in length, which is fine for all the commands
we support. However, in preparation for supporting passthrough IO,
provide an option for setting up a ring with 128-byte SQEs.
We continue to use the same type for io_uring_sqe, it's marked and
commented with a zero sized array pad at the end. This provides up
to 80 bytes of data for a passthrough command - 64 bytes for the
extra added data, and 16 bytes available at the end of the existing
SQE.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
---
fs/io_uring.c | 14 +++++++++++---
include/uapi/linux/io_uring.h | 8 ++++++++
2 files changed, 19 insertions(+), 3 deletions(-)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8143,8 +8143,12 @@ static const struct io_uring_sqe *io_get
* though the application is the one updating it.
*/
head = READ_ONCE(ctx->sq_array[sq_idx]);
- if (likely(head < ctx->sq_entries))
+ if (likely(head < ctx->sq_entries)) {
+ /* double index for 128-byte SQEs, twice as long */
+ if (ctx->flags & IORING_SETUP_SQE128)
+ head <<= 1;
return &ctx->sq_sqes[head];
+ }
/* drop invalid entries */
ctx->cq_extra--;
@@ -11337,7 +11341,10 @@ static __cold int io_allocate_scq_urings
rings->sq_ring_entries = p->sq_entries;
rings->cq_ring_entries = p->cq_entries;
- size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
+ if (p->flags & IORING_SETUP_SQE128)
+ size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries);
+ else
+ size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
if (size == SIZE_MAX) {
io_mem_free(ctx->rings);
ctx->rings = NULL;
@@ -11561,7 +11568,8 @@ static long io_uring_setup(u32 entries,
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
- IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL))
+ IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
+ IORING_SETUP_SQE128))
return -EINVAL;
return io_uring_create(entries, &p, params);
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -61,6 +61,12 @@ struct io_uring_sqe {
__u32 file_index;
};
__u64 __pad2[2];
+
+ /*
+ * If the ring is initialized with IORING_SETUP_SQE128, then this field
+ * contains 64-bytes of padding, doubling the size of the SQE.
+ */
+ __u64 __big_sqe_pad[0];
};
/*
@@ -112,6 +118,8 @@ enum {
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
+#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,