[PATCH 2/8] io_uring: add infrastructure around io_uring_cmd_sqe issue type

Jens Axboe axboe at kernel.dk
Wed Mar 17 22:10:21 GMT 2021


Define an io_uring_cmd_sqe struct that passthrough commands can use,
and define an array that has offset information for the two members
that we care about (user_data and personality). Then we can init the
two command types in basically the same way, just reading the user_data
and personality at the defined offsets for the command type.

Signed-off-by: Jens Axboe <axboe at kernel.dk>
---
 fs/io_uring.c                 | 57 +++++++++++++++++++++++++++--------
 include/uapi/linux/io_uring.h | 10 ++++++
 2 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 416e47832468..a4699b066172 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -824,6 +824,22 @@ struct io_defer_entry {
 	u32			seq;
 };
 
+struct sqe_offset {
+	unsigned char		user_data;
+	unsigned char		personality;
+};
+
+static struct sqe_offset sqe_offsets[] = {
+	{
+		.user_data	= offsetof(struct io_uring_sqe, user_data),
+		.personality	= offsetof(struct io_uring_sqe, personality)
+	},
+	{
+		.user_data	= offsetof(struct io_uring_cmd_sqe, user_data),
+		.personality	= offsetof(struct io_uring_cmd_sqe, personality)
+	}
+};
+
 struct io_op_def {
 	/* needs req->file assigned */
 	unsigned		needs_file : 1;
@@ -844,6 +860,8 @@ struct io_op_def {
 	unsigned		plug : 1;
 	/* size of async data needed, if any */
 	unsigned short		async_size;
+	/* offset definition for user_data/personality */
+	unsigned short		offsets;
 };
 
 static const struct io_op_def io_op_defs[] = {
@@ -988,6 +1006,9 @@ static const struct io_op_def io_op_defs[] = {
 	},
 	[IORING_OP_RENAMEAT] = {},
 	[IORING_OP_UNLINKAT] = {},
+	[IORING_OP_URING_CMD] = {
+		.offsets		= 1,
+	},
 };
 
 static bool io_disarm_next(struct io_kiocb *req);
@@ -6384,16 +6405,21 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
 }
 
 static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
-		       const struct io_uring_sqe *sqe)
+		       const struct io_uring_sqe_hdr *hdr)
 {
 	struct io_submit_state *state;
+	const struct io_op_def *def;
 	unsigned int sqe_flags;
+	const __u64 *uptr;
+	const __u16 *pptr;
 	int personality, ret = 0;
 
-	req->opcode = READ_ONCE(sqe->hdr.opcode);
+	req->opcode = READ_ONCE(hdr->opcode);
+	def = &io_op_defs[req->opcode];
 	/* same numerical values with corresponding REQ_F_*, safe to copy */
-	req->flags = sqe_flags = READ_ONCE(sqe->hdr.flags);
-	req->user_data = READ_ONCE(sqe->user_data);
+	req->flags = sqe_flags = READ_ONCE(hdr->flags);
+	uptr = (const void *) hdr + sqe_offsets[def->offsets].user_data;
+	req->user_data = READ_ONCE(*uptr);
 	req->async_data = NULL;
 	req->file = NULL;
 	req->ctx = ctx;
@@ -6419,11 +6445,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
 		return -EACCES;
 
-	if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
-	    !io_op_defs[req->opcode].buffer_select)
+	if ((sqe_flags & IOSQE_BUFFER_SELECT) && !def->buffer_select)
 		return -EOPNOTSUPP;
 
-	personality = READ_ONCE(sqe->personality);
+	pptr = (const void *) hdr + sqe_offsets[def->offsets].personality;
+	personality = READ_ONCE(*pptr);
 	if (personality) {
 		req->work.creds = xa_load(&ctx->personalities, personality);
 		if (!req->work.creds)
@@ -6436,17 +6462,15 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	 * Plug now if we have more than 1 IO left after this, and the target
 	 * is potentially a read/write to block based storage.
 	 */
-	if (!state->plug_started && state->ios_left > 1 &&
-	    io_op_defs[req->opcode].plug) {
+	if (!state->plug_started && state->ios_left > 1 && def->plug) {
 		blk_start_plug(&state->plug);
 		state->plug_started = true;
 	}
 
-	if (io_op_defs[req->opcode].needs_file) {
+	if (def->needs_file) {
 		bool fixed = req->flags & REQ_F_FIXED_FILE;
 
-		req->file = io_file_get(state, req, READ_ONCE(sqe->hdr.fd),
-					fixed);
+		req->file = io_file_get(state, req, READ_ONCE(hdr->fd), fixed);
 		if (unlikely(!req->file))
 			ret = -EBADF;
 	}
@@ -6461,7 +6485,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	struct io_submit_link *link = &ctx->submit_state.link;
 	int ret;
 
-	ret = io_init_req(ctx, req, sqe);
+	ret = io_init_req(ctx, req, &sqe->hdr);
 	if (unlikely(ret)) {
 fail_req:
 		io_req_complete_failed(req, ret);
@@ -9915,6 +9939,7 @@ static int __init io_uring_init(void)
 #define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \
 	__BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename)
 	BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64);
+	BUILD_BUG_ON(sizeof(struct io_uring_cmd_sqe) != 64);
 	BUILD_BUG_SQE_ELEM(0,  __u8,   hdr.opcode);
 	BUILD_BUG_SQE_ELEM(1,  __u8,   hdr.flags);
 	BUILD_BUG_SQE_ELEM(2,  __u16,  hdr.ioprio);
@@ -9943,6 +9968,12 @@ static int __init io_uring_init(void)
 	BUILD_BUG_SQE_ELEM(40, __u16,  buf_index);
 	BUILD_BUG_SQE_ELEM(42, __u16,  personality);
 	BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
+#define BUILD_BUG_SQEC_ELEM(eoffset, etype, ename) \
+	__BUILD_BUG_VERIFY_ELEMENT(struct io_uring_cmd_sqe, eoffset, etype, ename)
+	BUILD_BUG_SQEC_ELEM(8,				__u64,	user_data);
+	BUILD_BUG_SQEC_ELEM(18,				__u16,	personality);
+	BUILD_BUG_SQEC_ELEM(sqe_offsets[1].user_data,	__u64,	user_data);
+	BUILD_BUG_SQEC_ELEM(sqe_offsets[1].personality,	__u16,	personality);
 
 	BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
 	BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5609474ccd9f..165ac406f00b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -74,6 +74,15 @@ struct io_uring_sqe {
 	};
 };
 
+struct io_uring_cmd_sqe {
+	struct io_uring_sqe_hdr	hdr;
+	__u64			user_data;
+	__u16			op;
+	__u16			personality;
+	__u32			len;
+	__u64			pdu[5];
+};
+
 enum {
 	IOSQE_FIXED_FILE_BIT,
 	IOSQE_IO_DRAIN_BIT,
@@ -148,6 +157,7 @@ enum {
 	IORING_OP_SHUTDOWN,
 	IORING_OP_RENAMEAT,
 	IORING_OP_UNLINKAT,
+	IORING_OP_URING_CMD,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
2.31.0




More information about the Linux-nvme mailing list