[PATCH 2/8] io_uring: add infrastructure around io_uring_cmd_sqe issue type
Jens Axboe
axboe at kernel.dk
Wed Mar 17 22:10:21 GMT 2021
Define an io_uring_cmd_sqe struct that passthrough commands can use,
and define an array that has offset information for the two members
that we care about (user_data and personality). Then we can init the
two command types in basically the same way, just reading the user_data
and personality at the defined offsets for the command type.
Signed-off-by: Jens Axboe <axboe at kernel.dk>
---
fs/io_uring.c | 57 +++++++++++++++++++++++++++--------
include/uapi/linux/io_uring.h | 10 ++++++
2 files changed, 54 insertions(+), 13 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 416e47832468..a4699b066172 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -824,6 +824,22 @@ struct io_defer_entry {
u32 seq;
};
+struct sqe_offset {
+ unsigned char user_data;
+ unsigned char personality;
+};
+
+static struct sqe_offset sqe_offsets[] = {
+ {
+ .user_data = offsetof(struct io_uring_sqe, user_data),
+ .personality = offsetof(struct io_uring_sqe, personality)
+ },
+ {
+ .user_data = offsetof(struct io_uring_cmd_sqe, user_data),
+ .personality = offsetof(struct io_uring_cmd_sqe, personality)
+ }
+};
+
struct io_op_def {
/* needs req->file assigned */
unsigned needs_file : 1;
@@ -844,6 +860,8 @@ struct io_op_def {
unsigned plug : 1;
/* size of async data needed, if any */
unsigned short async_size;
+ /* offset definition for user_data/personality */
+ unsigned short offsets;
};
static const struct io_op_def io_op_defs[] = {
@@ -988,6 +1006,9 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_RENAMEAT] = {},
[IORING_OP_UNLINKAT] = {},
+ [IORING_OP_URING_CMD] = {
+ .offsets = 1,
+ },
};
static bool io_disarm_next(struct io_kiocb *req);
@@ -6384,16 +6405,21 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
}
static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+ const struct io_uring_sqe_hdr *hdr)
{
struct io_submit_state *state;
+ const struct io_op_def *def;
unsigned int sqe_flags;
+ const __u64 *uptr;
+ const __u16 *pptr;
int personality, ret = 0;
- req->opcode = READ_ONCE(sqe->hdr.opcode);
+ req->opcode = READ_ONCE(hdr->opcode);
+ def = &io_op_defs[req->opcode];
/* same numerical values with corresponding REQ_F_*, safe to copy */
- req->flags = sqe_flags = READ_ONCE(sqe->hdr.flags);
- req->user_data = READ_ONCE(sqe->user_data);
+ req->flags = sqe_flags = READ_ONCE(hdr->flags);
+ uptr = (const void *) hdr + sqe_offsets[def->offsets].user_data;
+ req->user_data = READ_ONCE(*uptr);
req->async_data = NULL;
req->file = NULL;
req->ctx = ctx;
@@ -6419,11 +6445,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
return -EACCES;
- if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
- !io_op_defs[req->opcode].buffer_select)
+ if ((sqe_flags & IOSQE_BUFFER_SELECT) && !def->buffer_select)
return -EOPNOTSUPP;
- personality = READ_ONCE(sqe->personality);
+ pptr = (const void *) hdr + sqe_offsets[def->offsets].personality;
+ personality = READ_ONCE(*pptr);
if (personality) {
req->work.creds = xa_load(&ctx->personalities, personality);
if (!req->work.creds)
@@ -6436,17 +6462,15 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
* Plug now if we have more than 1 IO left after this, and the target
* is potentially a read/write to block based storage.
*/
- if (!state->plug_started && state->ios_left > 1 &&
- io_op_defs[req->opcode].plug) {
+ if (!state->plug_started && state->ios_left > 1 && def->plug) {
blk_start_plug(&state->plug);
state->plug_started = true;
}
- if (io_op_defs[req->opcode].needs_file) {
+ if (def->needs_file) {
bool fixed = req->flags & REQ_F_FIXED_FILE;
- req->file = io_file_get(state, req, READ_ONCE(sqe->hdr.fd),
- fixed);
+ req->file = io_file_get(state, req, READ_ONCE(hdr->fd), fixed);
if (unlikely(!req->file))
ret = -EBADF;
}
@@ -6461,7 +6485,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
struct io_submit_link *link = &ctx->submit_state.link;
int ret;
- ret = io_init_req(ctx, req, sqe);
+ ret = io_init_req(ctx, req, &sqe->hdr);
if (unlikely(ret)) {
fail_req:
io_req_complete_failed(req, ret);
@@ -9915,6 +9939,7 @@ static int __init io_uring_init(void)
#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \
__BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename)
BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64);
+ BUILD_BUG_ON(sizeof(struct io_uring_cmd_sqe) != 64);
BUILD_BUG_SQE_ELEM(0, __u8, hdr.opcode);
BUILD_BUG_SQE_ELEM(1, __u8, hdr.flags);
BUILD_BUG_SQE_ELEM(2, __u16, hdr.ioprio);
@@ -9943,6 +9968,12 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
+#define BUILD_BUG_SQEC_ELEM(eoffset, etype, ename) \
+ __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_cmd_sqe, eoffset, etype, ename)
+ BUILD_BUG_SQEC_ELEM(8, __u64, user_data);
+ BUILD_BUG_SQEC_ELEM(18, __u16, personality);
+ BUILD_BUG_SQEC_ELEM(sqe_offsets[1].user_data, __u64, user_data);
+ BUILD_BUG_SQEC_ELEM(sqe_offsets[1].personality, __u16, personality);
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5609474ccd9f..165ac406f00b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -74,6 +74,15 @@ struct io_uring_sqe {
};
};
+struct io_uring_cmd_sqe {
+ struct io_uring_sqe_hdr hdr;
+ __u64 user_data;
+ __u16 op;
+ __u16 personality;
+ __u32 len;
+ __u64 pdu[5];
+};
+
enum {
IOSQE_FIXED_FILE_BIT,
IOSQE_IO_DRAIN_BIT,
@@ -148,6 +157,7 @@ enum {
IORING_OP_SHUTDOWN,
IORING_OP_RENAMEAT,
IORING_OP_UNLINKAT,
+ IORING_OP_URING_CMD,
/* this goes last, obviously */
IORING_OP_LAST,
--
2.31.0
More information about the Linux-nvme
mailing list