[PATCH] nvme: uring_cmd specific request_queue for SGLs

Keith Busch kbusch at meta.com
Tue Jun 24 14:14:44 PDT 2025


From: Keith Busch <kbusch at kernel.org>

User space passthrough IO commands are committed to using the SGL
transfer types if the device supports it. The virt_boundary_mask is a
PRP specific constraint, and this limit causes kernel bounce buffers to
be used when a user vector could have been handled directly. Avoiding
unnecessary copies is important for uring_cmd usage as this is a high
performance interface.

For devices that support SGL, create a new request_queue that drops the
virt_boundary_mask so that vectored user requests can be used with
zero-copy performance. Normal read/write will still use the old boundary
mask, as we can't be sure if forcing all IO to use SGL over PRP won't
cause unexpected regressions for some devices.

Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 drivers/nvme/host/core.c  | 28 +++++++++++++++++++++++++++-
 drivers/nvme/host/ioctl.c |  2 +-
 drivers/nvme/host/nvme.h  |  1 +
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3da5ac71a9b07..e4e03cb9e5c0e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -721,7 +721,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd)
 	bool logging_enabled;
 
 	if (req->q->queuedata) {
-		struct nvme_ns *ns = req->q->disk->private_data;
+		struct nvme_ns *ns = req->q->queuedata;
 
 		logging_enabled = ns->head->passthru_err_log_enabled;
 		req->timeout = NVME_IO_TIMEOUT;
@@ -4081,6 +4081,27 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
 	list_add(&ns->list, &ns->ctrl->namespaces);
 }
 
+static void nvme_init_uring_queue(struct nvme_ns *ns)
+{
+	struct nvme_ctrl *ctrl = ns->ctrl;
+	struct queue_limits lim = {};
+	struct request_queue *q;
+
+	if (!nvme_ctrl_sgl_supported(ctrl)) {
+		ns->uring_queue = ns->queue;
+		return;
+	}
+
+	nvme_set_ctrl_limits(ctrl, &lim);
+	lim.virt_boundary_mask = 0;
+
+	q = blk_mq_alloc_queue(ctrl->tagset, &lim, ns);
+	if (IS_ERR(q))
+		ns->uring_queue = ns->queue;
+	else
+		ns->uring_queue = q;
+}
+
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
 {
 	struct queue_limits lim = { };
@@ -4157,6 +4178,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
 
 	if (!nvme_ns_head_multipath(ns->head))
 		nvme_add_ns_cdev(ns);
+	nvme_init_uring_queue(ns);
 
 	nvme_mpath_add_disk(ns, info->anagrpid);
 	nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
@@ -4224,6 +4246,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 
 	if (!nvme_ns_head_multipath(ns->head))
 		nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+	if (ns->uring_queue != ns->queue) {
+		blk_mq_destroy_queue(ns->uring_queue);
+		blk_put_queue(ns->uring_queue);
+	}
 
 	nvme_mpath_remove_sysfs_link(ns);
 
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 6b3ac8ae3f34b..f925a10391001 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -445,7 +445,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 {
 	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
 	const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe);
-	struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
+	struct request_queue *q = ns ? ns->uring_queue : ctrl->admin_q;
 	struct nvme_uring_data d;
 	struct nvme_command c;
 	struct iov_iter iter;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 7df2ea21851f5..d371940bd342d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -548,6 +548,7 @@ struct nvme_ns {
 
 	struct cdev		cdev;
 	struct device		cdev_device;
+	struct request_queue	*uring_queue;
 
 	struct nvme_fault_inject fault_inject;
 };
-- 
2.47.1




More information about the Linux-nvme mailing list