[PATCH v3 3/3] nvme/ioctl: move fixed buffer lookup to nvme_uring_cmd_io()
Caleb Sander Mateos
csander at purestorage.com
Mon Mar 24 13:05:40 PDT 2025
nvme_map_user_request() is called from both nvme_submit_user_cmd() and
nvme_uring_cmd_io(). But the ioucmd branch is only applicable to
nvme_uring_cmd_io(). Move it to nvme_uring_cmd_io() and just pass the
resulting iov_iter to nvme_map_user_request().
For NVMe passthru operations with fixed buffers, the fixed buffer lookup
happens in io_uring_cmd_import_fixed(). But nvme_uring_cmd_io() can
return -EAGAIN first from nvme_alloc_user_request() if all tags in the
tag set are in use. This ordering difference is observable when using
UBLK_U_IO_{,UN}REGISTER_IO_BUF SQEs to modify the fixed buffer table. If
the NVMe passthru operation is followed by UBLK_U_IO_UNREGISTER_IO_BUF
to unregister the fixed buffer and the NVMe passthru goes async, the
fixed buffer lookup will fail because it happens after the unregister.
Userspace should not depend on the order in which io_uring issues SQEs
submitted in parallel, but it may try submitting the SQEs together and
fall back on a slow path if the fixed buffer lookup fails. To make the
fast path more likely, do the import before nvme_alloc_user_request().
Signed-off-by: Caleb Sander Mateos <csander at purestorage.com>
---
drivers/nvme/host/ioctl.c | 45 +++++++++++++++++++++------------------
1 file changed, 24 insertions(+), 21 deletions(-)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index f6576e7201c5..da0eee21ecd0 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -112,12 +112,11 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
return req;
}
static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- struct io_uring_cmd *ioucmd, unsigned int flags,
- unsigned int iou_issue_flags)
+ struct iov_iter *iter, unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
struct block_device *bdev = ns ? ns->disk->part0 : NULL;
bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk);
@@ -135,28 +134,16 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
if (!nvme_ctrl_meta_sgl_supported(ctrl))
dev_warn_once(ctrl->device,
"using unchecked metadata buffer\n");
}
- if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
- struct iov_iter iter;
-
- /* fixedbufs is only for non-vectored io */
- if (flags & NVME_IOCTL_VEC)
- return -EINVAL;
-
- ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
- rq_data_dir(req), &iter, ioucmd,
- iou_issue_flags);
- if (ret < 0)
- return ret;
- ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
- } else {
+ if (iter)
+ ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL);
+ else
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
0, rq_data_dir(req));
- }
if (ret)
return ret;
bio = req->bio;
@@ -194,11 +181,11 @@ static int nvme_submit_user_cmd(struct request_queue *q,
return PTR_ERR(req);
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
- meta_len, NULL, flags, 0);
+ meta_len, NULL, flags);
if (ret)
goto out_free_req;
}
bio = req->bio;
@@ -465,10 +452,12 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe);
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
+ struct iov_iter iter;
+ struct iov_iter *map_iter = NULL;
struct request *req;
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
int ret;
@@ -500,10 +489,24 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
d.addr = READ_ONCE(cmd->addr);
d.data_len = READ_ONCE(cmd->data_len);
d.metadata_len = READ_ONCE(cmd->metadata_len);
d.timeout_ms = READ_ONCE(cmd->timeout_ms);
+ if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
+ /* fixedbufs is only for non-vectored io */
+ if (vec)
+ return -EINVAL;
+
+ ret = io_uring_cmd_import_fixed(d.addr, d.data_len,
+ nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd,
+ issue_flags);
+ if (ret < 0)
+ return ret;
+
+ map_iter = &iter;
+ }
+
if (issue_flags & IO_URING_F_NONBLOCK) {
rq_flags |= REQ_NOWAIT;
blk_flags = BLK_MQ_REQ_NOWAIT;
}
if (issue_flags & IO_URING_F_IOPOLL)
@@ -513,13 +516,13 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
if (IS_ERR(req))
return PTR_ERR(req);
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;
if (d.data_len) {
- ret = nvme_map_user_request(req, d.addr,
- d.data_len, nvme_to_user_ptr(d.metadata),
- d.metadata_len, ioucmd, vec, issue_flags);
+ ret = nvme_map_user_request(req, d.addr, d.data_len,
+ nvme_to_user_ptr(d.metadata), d.metadata_len,
+ map_iter, vec);
if (ret)
goto out_free_req;
}
/* to free bio on completion, as req->bio will be null at that time */
--
2.45.2
More information about the Linux-nvme
mailing list