[RFC 1/5] io_uring: add support for 128-byte SQEs
Kanchan Joshi
joshi.k at samsung.com
Fri Apr 1 04:03:06 PDT 2022
From: Jens Axboe <axboe at kernel.dk>
Normal SQEs are 64-bytes in length, which is fine for all the commands
we support. However, in preparation for supporting passthrough IO,
provide an option for setting up a ring with 128-byte SQEs.
We continue to use the same type for io_uring_sqe, it's marked and
commented with a zero sized array pad at the end. This provides up
to 80 bytes of data for a passthrough command - 64 bytes for the
extra added data, and 16 bytes available at the end of the existing
SQE.
Signed-off-by: Jens Axboe <axboe at kernel.dk>
---
fs/io_uring.c | 13 ++++++++++---
include/uapi/linux/io_uring.h | 7 +++++++
2 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a7412f6862fc..241ba1cd6dcf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7431,8 +7431,12 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
* though the application is the one updating it.
*/
head = READ_ONCE(ctx->sq_array[sq_idx]);
- if (likely(head < ctx->sq_entries))
+ if (likely(head < ctx->sq_entries)) {
+ /* double index for 128-byte SQEs, twice as long */
+ if (ctx->flags & IORING_SETUP_SQE128)
+ head <<= 1;
return &ctx->sq_sqes[head];
+ }
/* drop invalid entries */
ctx->cq_extra--;
@@ -10431,7 +10435,10 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
rings->sq_ring_entries = p->sq_entries;
rings->cq_ring_entries = p->cq_entries;
- size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
+ if (p->flags & IORING_SETUP_SQE128)
+ size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries);
+ else
+ size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
if (size == SIZE_MAX) {
io_mem_free(ctx->rings);
ctx->rings = NULL;
@@ -10643,7 +10650,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
- IORING_SETUP_R_DISABLED))
+ IORING_SETUP_R_DISABLED | IORING_SETUP_SQE128))
return -EINVAL;
return io_uring_create(entries, &p, params);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 787f491f0d2a..c5db68433ca5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -61,6 +61,12 @@ struct io_uring_sqe {
__u32 file_index;
};
__u64 __pad2[2];
+
+ /*
+ * If the ring is initializefd with IORING_SETUP_SQE128, then this field
+ * contains 64-bytes of padding, doubling the size of the SQE.
+ */
+ __u64 __big_sqe_pad[0];
};
enum {
@@ -101,6 +107,7 @@ enum {
#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
+#define IORING_SETUP_SQE128 (1U << 7) /* SQEs are 128b */
enum {
IORING_OP_NOP,
--
2.25.1
More information about the Linux-nvme
mailing list