[PATCH] nvme: uring_cmd specific request_queue for SGLs
Keith Busch
kbusch at meta.com
Tue Jun 24 14:14:44 PDT 2025
From: Keith Busch <kbusch at kernel.org>
User space passthrough IO commands are committed to using the SGL
transfer types if the device supports it. The virt_boundary_mask is a
PRP specific constraint, and this limit causes kernel bounce buffers to
be used when a user vector could have been handled directly. Avoiding
unnecessary copies is important for uring_cmd usage as this is a high
performance interface.
For devices that support SGL, create a new request_queue that drops the
virt_boundary_mask so that vectored user requests can be used with
zero-copy performance. Normal read/write will still use the old boundary
mask, as we can't be sure if forcing all IO to use SGL over PRP won't
cause unexpected regressions for some devices.
Signed-off-by: Keith Busch <kbusch at kernel.org>
---
drivers/nvme/host/core.c | 28 +++++++++++++++++++++++++++-
drivers/nvme/host/ioctl.c | 2 +-
drivers/nvme/host/nvme.h | 1 +
3 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3da5ac71a9b07..e4e03cb9e5c0e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -721,7 +721,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd)
bool logging_enabled;
if (req->q->queuedata) {
- struct nvme_ns *ns = req->q->disk->private_data;
+ struct nvme_ns *ns = req->q->queuedata;
logging_enabled = ns->head->passthru_err_log_enabled;
req->timeout = NVME_IO_TIMEOUT;
@@ -4081,6 +4081,27 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
list_add(&ns->list, &ns->ctrl->namespaces);
}
+static void nvme_init_uring_queue(struct nvme_ns *ns)
+{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct queue_limits lim = {};
+ struct request_queue *q;
+
+ if (!nvme_ctrl_sgl_supported(ctrl)) {
+ ns->uring_queue = ns->queue;
+ return;
+ }
+
+ nvme_set_ctrl_limits(ctrl, &lim);
+ lim.virt_boundary_mask = 0;
+
+ q = blk_mq_alloc_queue(ctrl->tagset, &lim, ns);
+ if (IS_ERR(q))
+ ns->uring_queue = ns->queue;
+ else
+ ns->uring_queue = q;
+}
+
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
{
struct queue_limits lim = { };
@@ -4157,6 +4178,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
if (!nvme_ns_head_multipath(ns->head))
nvme_add_ns_cdev(ns);
+ nvme_init_uring_queue(ns);
nvme_mpath_add_disk(ns, info->anagrpid);
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
@@ -4224,6 +4246,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
if (!nvme_ns_head_multipath(ns->head))
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+ if (ns->uring_queue != ns->queue) {
+ blk_mq_destroy_queue(ns->uring_queue);
+ blk_put_queue(ns->uring_queue);
+ }
nvme_mpath_remove_sysfs_link(ns);
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 6b3ac8ae3f34b..f925a10391001 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -445,7 +445,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
{
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe);
- struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
+ struct request_queue *q = ns ? ns->uring_queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
struct iov_iter iter;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 7df2ea21851f5..d371940bd342d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -548,6 +548,7 @@ struct nvme_ns {
struct cdev cdev;
struct device cdev_device;
+ struct request_queue *uring_queue;
struct nvme_fault_inject fault_inject;
};
--
2.47.1
More information about the Linux-nvme
mailing list