[PATCH v4 07/11] io_uring/rw: add support to send meta along with read/write

Anuj Gupta anuj20.g at samsung.com
Wed Oct 16 04:29:08 PDT 2024


This patch adds the capability of sending meta along with read/write.
This meta is represented by a newly introduced 'struct io_uring_meta'
which specifies information such as meta type,flags,buffer,length,seed
and apptag.
Application sets up a SQE128 ring, prepares io_uring_meta within the
second SQE.
The patch processes the user-passed information to prepare uio_meta
descriptor and passes it down using kiocb->private.

Meta exchange is supported only for direct IO.
Also vectored read/write operations with meta are not supported
currently.

Signed-off-by: Anuj Gupta <anuj20.g at samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
---
 include/uapi/linux/io_uring.h | 26 ++++++++++++
 io_uring/io_uring.c           |  6 +++
 io_uring/rw.c                 | 75 +++++++++++++++++++++++++++++++++--
 io_uring/rw.h                 | 15 ++++++-
 4 files changed, 118 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 86cb385fe0b5..1cd165720fcc 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -105,6 +105,32 @@ struct io_uring_sqe {
 		 */
 		__u8	cmd[0];
 	};
+	/*
+	 * If the ring is initialized with IORING_SETUP_SQE128, then
+	 * this field is starting offset for 64 bytes of data. For meta io
+	 * this contains 'struct io_uring_meta'
+	 */
+	__u8	big_sqe_cmd[0];
+};
+
+enum io_uring_sqe_meta_type_bits {
+	META_TYPE_INTEGRITY_BIT,
+	/* not a real meta type; just to make sure that we don't overflow */
+	META_TYPE_LAST_BIT,
+};
+
+/* meta type flags */
+#define META_TYPE_INTEGRITY	(1U << META_TYPE_INTEGRITY_BIT)
+
+/* this goes to SQE128 */
+struct io_uring_meta {
+	__u16		meta_type;
+	__u16		meta_flags;
+	__u32		meta_len;
+	__u64		meta_addr;
+	__u32		seed;
+	__u16		app_tag;
+	__u8		pad[42];
 };
 
 /*
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index d7ad4ea5f40b..e5551e2e7bde 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3857,6 +3857,12 @@ static int __init io_uring_init(void)
 	/* top 8bits are for internal use */
 	BUILD_BUG_ON((IORING_URING_CMD_MASK & 0xff000000) != 0);
 
+	BUILD_BUG_ON(sizeof(struct io_uring_meta) >
+		     sizeof(struct io_uring_sqe));
+
+	BUILD_BUG_ON(META_TYPE_LAST_BIT >
+		     8 * sizeof_field(struct io_uring_meta, meta_type));
+
 	io_uring_optable_init();
 
 	/*
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 80ae3c2ebb70..b727e5ef19fc 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -257,6 +257,49 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
 	return 0;
 }
 
+static inline void io_meta_save_state(struct io_async_rw *io)
+{
+	io->meta_state.seed = io->meta.seed;
+	iov_iter_save_state(&io->meta.iter, &io->meta_state.iter_meta);
+}
+
+static inline void io_meta_restore(struct io_async_rw *io)
+{
+	io->meta.seed = io->meta_state.seed;
+	iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta);
+}
+
+static int io_prep_rw_meta(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+			   struct io_rw *rw, int ddir)
+{
+	const struct io_uring_meta *md = (struct io_uring_meta *)sqe->big_sqe_cmd;
+	u16 meta_type = READ_ONCE(md->meta_type);
+	const struct io_issue_def *def;
+	struct io_async_rw *io;
+	int ret;
+
+	if (!meta_type)
+		return 0;
+	if (!(meta_type & META_TYPE_INTEGRITY))
+		return -EINVAL;
+
+	def = &io_issue_defs[req->opcode];
+	if (def->vectored)
+		return -EOPNOTSUPP;
+
+	io = req->async_data;
+	io->meta.flags = READ_ONCE(md->meta_flags);
+	io->meta.app_tag = READ_ONCE(md->app_tag);
+	io->meta.seed = READ_ONCE(md->seed);
+	ret = import_ubuf(ddir, u64_to_user_ptr(READ_ONCE(md->meta_addr)),
+			  READ_ONCE(md->meta_len), &io->meta.iter);
+	if (unlikely(ret < 0))
+		return ret;
+	rw->kiocb.ki_flags |= IOCB_HAS_METADATA;
+	io_meta_save_state(io);
+	return ret;
+}
+
 static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		      int ddir, bool do_import)
 {
@@ -279,11 +322,18 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		rw->kiocb.ki_ioprio = get_current_ioprio();
 	}
 	rw->kiocb.dio_complete = NULL;
+	rw->kiocb.ki_flags = 0;
 
 	rw->addr = READ_ONCE(sqe->addr);
 	rw->len = READ_ONCE(sqe->len);
 	rw->flags = READ_ONCE(sqe->rw_flags);
-	return io_prep_rw_setup(req, ddir, do_import);
+	ret = io_prep_rw_setup(req, ddir, do_import);
+
+	if (unlikely(ret))
+		return ret;
+	if (unlikely(req->ctx->flags & IORING_SETUP_SQE128))
+		ret = io_prep_rw_meta(req, sqe, rw, ddir);
+	return ret;
 }
 
 int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -410,7 +460,10 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
 static void io_resubmit_prep(struct io_kiocb *req)
 {
 	struct io_async_rw *io = req->async_data;
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 
+	if (unlikely(rw->kiocb.ki_flags & IOCB_HAS_METADATA))
+		io_meta_restore(io);
 	iov_iter_restore(&io->iter, &io->iter_state);
 }
 
@@ -777,8 +830,12 @@ static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter)
 
 static bool need_complete_io(struct io_kiocb *req)
 {
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+
+	/* Exclude meta IO as we don't support partial completion for that */
 	return req->flags & REQ_F_ISREG ||
-		S_ISBLK(file_inode(req->file)->i_mode);
+		S_ISBLK(file_inode(req->file)->i_mode) ||
+		!(rw->kiocb.ki_flags & IOCB_HAS_METADATA);
 }
 
 static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
@@ -795,7 +852,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 	if (!(req->flags & REQ_F_FIXED_FILE))
 		req->flags |= io_file_get_flags(file);
 
-	kiocb->ki_flags = file->f_iocb_flags;
+	kiocb->ki_flags |= file->f_iocb_flags;
 	ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
 	if (unlikely(ret))
 		return ret;
@@ -824,6 +881,14 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 		kiocb->ki_complete = io_complete_rw;
 	}
 
+	if (unlikely(kiocb->ki_flags & IOCB_HAS_METADATA)) {
+		struct io_async_rw *io = req->async_data;
+
+		if (!(req->file->f_flags & O_DIRECT))
+			return -EOPNOTSUPP;
+		kiocb->private = &io->meta;
+	}
+
 	return 0;
 }
 
@@ -898,6 +963,8 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
 	 * manually if we need to.
 	 */
 	iov_iter_restore(&io->iter, &io->iter_state);
+	if (unlikely(kiocb->ki_flags & IOCB_HAS_METADATA))
+		io_meta_restore(io);
 
 	do {
 		/*
@@ -1102,6 +1169,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	} else {
 ret_eagain:
 		iov_iter_restore(&io->iter, &io->iter_state);
+		if (unlikely(kiocb->ki_flags & IOCB_HAS_METADATA))
+			io_meta_restore(io);
 		if (kiocb->ki_flags & IOCB_WRITE)
 			io_req_end_write(req);
 		return -EAGAIN;
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 3f432dc75441..af9e338b2bd8 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -1,6 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/pagemap.h>
+#include <linux/bio-integrity.h>
+
+struct io_meta_state {
+	u32			seed;
+	struct iov_iter_state	iter_meta;
+};
 
 struct io_async_rw {
 	size_t				bytes_done;
@@ -9,7 +15,14 @@ struct io_async_rw {
 	struct iovec			fast_iov;
 	struct iovec			*free_iovec;
 	int				free_iov_nr;
-	struct wait_page_queue		wpq;
+	/* wpq is for buffered io, while meta fields are used with direct io*/
+	union {
+		struct wait_page_queue		wpq;
+		struct {
+			struct uio_meta			meta;
+			struct io_meta_state		meta_state;
+		};
+	};
 };
 
 int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
-- 
2.25.1




More information about the Linux-nvme mailing list