[RFC 0/5] big-cqe based uring-passthru

Christoph Hellwig hch at lst.de
Mon Apr 4 00:21:52 PDT 2022


I really can't get excited about the pdu thingy.  Here is a patch
(on top of the series and the patch sent in reply to patch 4) that
does away with it and just adds a oob_user field to struct io_uring_cmd
to simplify the handling a fair bit:

---
>From 426fa5de1d5f5a718b797eda2fc3ea47010662f7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch at lst.de>
Date: Mon, 4 Apr 2022 08:24:43 +0200
Subject: io_uring: explicit support for out of band data in io_uring_cmd

Instead of the magic pdu byte array, which in its current form causes
unaligned pointers and a lot of casting add explicit support for out
of band data in struct io_uring_cmd and just leave a normal private
data pointer to the driver.

Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 drivers/nvme/host/ioctl.c | 35 +++++++----------------------------
 include/linux/io_uring.h  | 10 ++++++++--
 2 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index ea6cfd4321942..b93c6ecfcd2ab 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -37,27 +37,9 @@ static int nvme_ioctl_finish_metadata(struct bio *bio, int ret,
 	return ret;
 }
 
-/*
- * This overlays struct io_uring_cmd pdu.
- * Expect build errors if this grows larger than that.
- */
-struct nvme_uring_cmd_pdu {
-	union {
-		struct bio *bio;
-		struct request *req;
-	};
-	void __user *meta_buffer;
-} __packed;
-
-static struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(struct io_uring_cmd *ioucmd)
-{
-	return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
-}
-
 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
 {
-	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-	struct request *req = pdu->req;
+	struct request *req = ioucmd->private;
 	struct bio *bio = req->bio;
 	int status;
 	u64 result;
@@ -71,7 +53,7 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
 	blk_mq_free_request(req);
 	blk_rq_unmap_user(bio);
 
-	status = nvme_ioctl_finish_metadata(bio, status, pdu->meta_buffer);
+	status = nvme_ioctl_finish_metadata(bio, status, ioucmd->oob_user);
 	result = le64_to_cpu(nvme_req(req)->result.u64);
 	io_uring_cmd_done(ioucmd, status, result);
 }
@@ -79,12 +61,10 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
 static void nvme_end_async_pt(struct request *req, blk_status_t err)
 {
 	struct io_uring_cmd *ioucmd = req->end_io_data;
-	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-	/* extract bio before reusing the same field for request */
-	struct bio *bio = pdu->bio;
+	struct bio *bio = ioucmd->private;
 
-	pdu->req = req;
 	req->bio = bio;
+	ioucmd->private = req;
 
 	/* this takes care of moving rest of completion-work to task context */
 	io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
@@ -381,7 +361,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 {
 	struct nvme_passthru_cmd64 *cmd =
 		(struct nvme_passthru_cmd64 *)ioucmd->cmd;
-	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
 	struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
 	struct nvme_command c;
 	struct request *req;
@@ -415,10 +394,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 		return PTR_ERR(req);
 
 	/* to free bio on completion, as req->bio will be null at that time */
-	pdu->bio = req->bio;
-	pdu->meta_buffer = nvme_to_user_ptr(cmd->metadata);
-	req->end_io_data = ioucmd;
+	ioucmd->private = req->bio;
+	ioucmd->oob_user = nvme_to_user_ptr(cmd->metadata);
 
+	req->end_io_data = ioucmd;
 	blk_execute_rq_nowait(req, 0, nvme_end_async_pt);
 	return -EIOCBQUEUED;
 }
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 0aba7b50cde65..95b56e45cd539 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -23,8 +23,14 @@ struct io_uring_cmd {
 	u32             flags;
 	u32             cmd_op;
 	u16		cmd_len;
-	u16		unused;
-	u8		pdu[28]; /* available inline for free use */
+
+	void		*private;
+
+	/*
+	 * Out of band data can be used for data that is not the main data.
+	 * E.g. block device PI/metadata or additional information.
+	 */
+	void __user	*oob_user;
 };
 
 #if defined(CONFIG_IO_URING)
-- 
2.30.2




More information about the Linux-nvme mailing list