[RFC PATCH 2/6] nvme: wire-up support for async-passthru on char-device.
Christoph Hellwig
hch at lst.de
Tue Sep 7 00:46:50 PDT 2021
Looking at this in isolation:
- no need to also implement the legacy non-64 passthrough interface
- no need to overlay the block_uring_cmd structure as that makes a
complete mess
Below is an untested patch to fix that up a bit.
A few other notes:
- I suspect the ioctl_cmd really should move into the core using_cmd
infrastructure
- please stick to the naming of the file operation instead of using
something different. That being said async_ioctl seems better
fitting than uring_cmd
- that whole mix of user space interface and internal data in the
->pdu field is a mess. What is the problem with deferring the
request freeing into the user context, which would clean up
quite a bit of that, especially if io_uring_cmd grows a private
field.
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index d336e34aac410..8ceff441b6425 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -18,12 +18,12 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
ptrval = (compat_uptr_t)ptrval;
return (void __user *)ptrval;
}
-/*
- * This is carved within the io_uring_cmd, to avoid dynamic allocation.
- * Care should be taken not to grow this beyond what is available.
- * Expect build warning otherwise.
- */
-struct uring_cmd_data {
+
+/* This overlays struct io_uring_cmd pdu (40 bytes) */
+struct nvme_uring_cmd {
+ __u32 ioctl_cmd;
+ __u32 unused1;
+ void __user *argp;
union {
struct bio *bio;
u64 result; /* nvme cmd result */
@@ -32,57 +32,42 @@ struct uring_cmd_data {
int status; /* nvme cmd status */
};
-inline u64 *nvme_ioucmd_data_addr(struct io_uring_cmd *ioucmd)
+static struct nvme_uring_cmd *nvme_uring_cmd(struct io_uring_cmd *ioucmd)
{
- return &(((struct block_uring_cmd *)&ioucmd->pdu)->unused2[1]);
+ return (struct nvme_uring_cmd *)&ioucmd->pdu;
}
static void nvme_pt_task_cb(struct io_uring_cmd *ioucmd)
{
- struct uring_cmd_data *ucd;
- struct nvme_passthru_cmd64 __user *ptcmd64 = NULL;
- struct block_uring_cmd *bcmd;
+ struct nvme_uring_cmd *cmd = nvme_uring_cmd(ioucmd);
+ struct nvme_passthru_cmd64 __user *ptcmd64 = cmd->argp;
- bcmd = (struct block_uring_cmd *) &ioucmd->pdu;
- ptcmd64 = (void __user *) bcmd->unused2[0];
- ucd = (struct uring_cmd_data *) nvme_ioucmd_data_addr(ioucmd);
-
- if (ucd->meta) {
+ if (cmd->meta) {
void __user *umeta = nvme_to_user_ptr(ptcmd64->metadata);
- if (!ucd->status)
- if (copy_to_user(umeta, ucd->meta, ptcmd64->metadata_len))
- ucd->status = -EFAULT;
- kfree(ucd->meta);
+ if (!cmd->status)
+ if (copy_to_user(umeta, cmd->meta, ptcmd64->metadata_len))
+ cmd->status = -EFAULT;
+ kfree(cmd->meta);
}
- if (likely(bcmd->ioctl_cmd == NVME_IOCTL_IO64_CMD)) {
- if (put_user(ucd->result, &ptcmd64->result))
- ucd->status = -EFAULT;
- } else {
- struct nvme_passthru_cmd __user *ptcmd = (void *)bcmd->unused2[0];
- if (put_user(ucd->result, &ptcmd->result))
- ucd->status = -EFAULT;
- }
- io_uring_cmd_done(ioucmd, ucd->status);
+ if (put_user(cmd->result, &ptcmd64->result))
+ cmd->status = -EFAULT;
+ io_uring_cmd_done(ioucmd, cmd->status);
}
static void nvme_end_async_pt(struct request *req, blk_status_t err)
{
- struct io_uring_cmd *ioucmd;
- struct uring_cmd_data *ucd;
- struct bio *bio;
-
- ioucmd = req->end_io_data;
- ucd = (struct uring_cmd_data *) nvme_ioucmd_data_addr(ioucmd);
+ struct io_uring_cmd *ioucmd = req->end_io_data;
+ struct nvme_uring_cmd *cmd = nvme_uring_cmd(ioucmd);
/* extract bio before reusing the same field for status */
- bio = ucd->bio;
+ struct bio *bio = cmd->bio;
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
- ucd->status = -EINTR;
+ cmd->status = -EINTR;
else
- ucd->status = nvme_req(req)->status;
- ucd->result = le64_to_cpu(nvme_req(req)->result.u64);
+ cmd->status = nvme_req(req)->status;
+ cmd->result = le64_to_cpu(nvme_req(req)->result.u64);
/* this takes care of setting up task-work */
io_uring_cmd_complete_in_task(ioucmd, nvme_pt_task_cb);
@@ -95,14 +80,15 @@ static void nvme_end_async_pt(struct request *req, blk_status_t err)
static void nvme_setup_uring_cmd_data(struct request *rq,
struct io_uring_cmd *ioucmd, void *meta, bool write)
{
- struct uring_cmd_data *ucd;
+ struct nvme_uring_cmd *cmd = nvme_uring_cmd(ioucmd);
- ucd = (struct uring_cmd_data *) nvme_ioucmd_data_addr(ioucmd);
/* to free bio on completion, as req->bio will be null at that time */
- ucd->bio = rq->bio;
+ cmd->bio = rq->bio;
/* meta update is required only for read requests */
if (meta && !write)
- ucd->meta = meta;
+ cmd->meta = meta;
+ else
+ cmd->meta = NULL;
rq->end_io_data = ioucmd;
}
@@ -139,23 +125,19 @@ static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
out:
return ERR_PTR(ret);
}
+
static inline bool nvme_is_fixedb_passthru(struct io_uring_cmd *ioucmd)
{
- struct block_uring_cmd *bcmd;
-
if (!ioucmd)
return false;
- bcmd = (struct block_uring_cmd *)&ioucmd->pdu;
- if (bcmd && ((bcmd->ioctl_cmd == NVME_IOCTL_IO_CMD_FIXED) ||
- (bcmd->ioctl_cmd == NVME_IOCTL_IO64_CMD_FIXED)))
- return true;
- return false;
+ return nvme_uring_cmd(ioucmd)->ioctl_cmd == NVME_IOCTL_IO64_CMD_FIXED;
}
+
/*
* Unlike blk_rq_map_user () this is only for fixed-buffer async passthrough.
* And hopefully faster as well.
*/
-int nvme_rq_map_user_fixedb(struct request_queue *q, struct request *rq,
+static int nvme_rq_map_user_fixedb(struct request_queue *q, struct request *rq,
void __user *ubuf, unsigned long len, gfp_t gfp_mask,
struct io_uring_cmd *ioucmd)
{
@@ -345,8 +327,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
}
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd __user *ucmd,
- struct io_uring_cmd *ioucmd)
+ struct nvme_passthru_cmd __user *ucmd)
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
@@ -382,9 +363,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
nvme_to_user_ptr(cmd.addr), cmd.data_len,
nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
- 0, &result, timeout, ioucmd);
+ 0, &result, timeout, NULL);
- if (!ioucmd && status >= 0) {
+ if (status >= 0) {
if (put_user(result, &ucmd->result))
return -EFAULT;
}
@@ -453,7 +434,7 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
{
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp, NULL);
+ return nvme_user_cmd(ctrl, NULL, argp);
case NVME_IOCTL_ADMIN64_CMD:
return nvme_user_cmd64(ctrl, NULL, argp, NULL);
default:
@@ -487,7 +468,7 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
force_successful_syscall_return();
return ns->head->ns_id;
case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->ctrl, ns, argp, NULL);
+ return nvme_user_cmd(ns->ctrl, ns, argp);
/*
* struct nvme_user_io can have different padding on some 32-bit ABIs.
* Just accept the compat version as all fields that are used are the
@@ -532,22 +513,13 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static int nvme_ns_async_ioctl(struct nvme_ns *ns, struct io_uring_cmd *ioucmd)
{
- struct block_uring_cmd *bcmd = (struct block_uring_cmd *)&ioucmd->pdu;
- void __user *argp = (void __user *) bcmd->unused2[0];
+ struct nvme_uring_cmd *cmd = nvme_uring_cmd(ioucmd);
int ret;
- BUILD_BUG_ON(sizeof(struct uring_cmd_data) >
- sizeof(struct block_uring_cmd) -
- offsetof(struct block_uring_cmd, unused2[1]));
-
- switch (bcmd->ioctl_cmd) {
- case NVME_IOCTL_IO_CMD:
- case NVME_IOCTL_IO_CMD_FIXED:
- ret = nvme_user_cmd(ns->ctrl, ns, argp, ioucmd);
- break;
+ switch (cmd->ioctl_cmd) {
case NVME_IOCTL_IO64_CMD:
case NVME_IOCTL_IO64_CMD_FIXED:
- ret = nvme_user_cmd64(ns->ctrl, ns, argp, ioucmd);
+ ret = nvme_user_cmd64(ns->ctrl, ns, cmd->argp, ioucmd);
break;
default:
ret = -ENOTTY;
@@ -674,7 +646,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
kref_get(&ns->kref);
up_read(&ctrl->namespaces_rwsem);
- ret = nvme_user_cmd(ctrl, ns, argp, NULL);
+ ret = nvme_user_cmd(ctrl, ns, argp);
nvme_put_ns(ns);
return ret;
@@ -691,7 +663,7 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp, NULL);
+ return nvme_user_cmd(ctrl, NULL, argp);
case NVME_IOCTL_ADMIN64_CMD:
return nvme_user_cmd64(ctrl, NULL, argp, NULL);
case NVME_IOCTL_IO_CMD:
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index fc05c6024edd6..a65e648a57928 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -78,7 +78,6 @@ struct nvme_passthru_cmd64 {
#define NVME_IOCTL_RESCAN _IO('N', 0x46)
#define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64)
#define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64)
-#define NVME_IOCTL_IO_CMD_FIXED _IOWR('N', 0x49, struct nvme_passthru_cmd)
#define NVME_IOCTL_IO64_CMD_FIXED _IOWR('N', 0x50, struct nvme_passthru_cmd64)
#endif /* _UAPI_LINUX_NVME_IOCTL_H */
More information about the Linux-nvme
mailing list