[PATCH v2 1/1] nvme : Add ioctl to query nvme attributes
Chaitanya Kulkarni
chaitanyak at nvidia.com
Thu Nov 10 07:13:04 PST 2022
On 11/10/22 03:05, Joel Granados wrote:
> An ioctl (NVME_IOCTL_GET_ATTR) is added to query nvme controller
> attributes needed to effectively write to the char device without needing
> to be a privileged user. They are also provided on block devices for
> convenience. The attributes here are meant to complement what you can
> already get with the allowed identify commands.
>
> New members are added to the nvme_ctrl struct: dmsl, vsl and wusl from
> the I/O Command Set Specific Identify Controller Data Structure in the nvme
> spec.
>
> We add NVME_IOCTL_GET_ATTR_{V0SZ,CURZS} in order to make the ioctl
> extensible. These serve as both size and version. The caller is expected to
> pass the structure size as the first member of the struct. For example:
>
> {...
> struct nvme_get_attr nvme_get_attr = {0};
> nvme_get_attr.argsz = sizeof(struct nvme_get_attr);
> ...}
>
> Signed-off-by: Joel Granados <j.granados at samsung.com>
> ---
> drivers/nvme/host/core.c | 5 ++-
> drivers/nvme/host/ioctl.c | 58 ++++++++++++++++++++++++++
> drivers/nvme/host/nvme.h | 11 +++++
> include/uapi/linux/nvme_ioctl.h | 74 +++++++++++++++++++++++++++++++++
> 4 files changed, 147 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 0090dc0b3ae6..267f28592b9d 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3062,9 +3062,12 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
>
> if (id->dmrl)
> ctrl->max_discard_segments = id->dmrl;
> - ctrl->dmrsl = le32_to_cpu(id->dmrsl);
> if (id->wzsl)
> ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
> + ctrl->dmrsl = le32_to_cpu(id->dmrsl);
> + ctrl->dmsl = le64_to_cpu(id->dmsl);
> + ctrl->vsl = id->vsl;
> + ctrl->wusl = id->wusl;
>
> free_data:
> kfree(id);
> diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
> index 9273db147872..ce69659f05b6 100644
> --- a/drivers/nvme/host/ioctl.c
> +++ b/drivers/nvme/host/ioctl.c
> @@ -53,6 +53,58 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
> return (void __user *)ptrval;
> }
>
> +static inline u32 nvme_sectors_to_mps(struct nvme_ctrl *ctrl, u32 unit)
> +{
> + return ilog2(unit) + 9 - 12 - NVME_CAP_MPSMIN(ctrl->cap);
please add a detailed comment here what you are doing and why you are
doing here... using magic numbers like 9 and 12 makes code hard to read
for others...
> +}
> +
> +static int nvme_ctrl_export_attrs(struct nvme_ctrl *ctrl,
> + struct nvme_get_attr __user *arg)
> +{
> + struct nvme_get_attr nvme_get_attr = {0};
following will not work ? thats what we have in the code :-
struct nvme_get_attr nvme_get_attr = { };
> + __u32 usize, retsize;
> + int ret;
> +
> + BUILD_BUG_ON(sizeof(struct nvme_get_attr) < NVME_IOCTL_GET_ATTR_V0SZ);
> + BUILD_BUG_ON(sizeof(struct nvme_get_attr) != NVME_IOCTL_GET_ATTR_CURSZ);
> +
what prevents us from moving this to _nvme_check_size() ?
> + if (copy_from_user(&nvme_get_attr, arg, 2 * sizeof(__u32)))
> + return -EFAULT;
> +
> + if (nvme_get_attr.flags != 0)
> + return -EINVAL;
> +
> + if (nvme_get_attr.argsz < NVME_IOCTL_GET_ATTR_V0SZ)
> + return -EINVAL;
> + usize = nvme_get_attr.argsz;
> +
> + /* Enforce backwards compatibility */
> + ret = copy_struct_from_user(&nvme_get_attr, NVME_IOCTL_GET_ATTR_CURSZ,
> + arg, usize);
> + if (ret)
> + return ret;
> +
> + nvme_get_attr.argsz = NVME_IOCTL_GET_ATTR_CURSZ;
> + nvme_get_attr.mpsmin = NVME_CAP_MPSMIN(ctrl->cap);
> + nvme_get_attr.vsl = ctrl->vsl;
> + nvme_get_attr.wusl = ctrl->wusl;
> + nvme_get_attr.dmrl = ctrl->max_discard_segments;
> + nvme_get_attr.dmsl = ctrl->dmsl;
> + nvme_get_attr.dmrsl = ctrl->dmrsl;
> + nvme_get_attr.oncs = ctrl->oncs;
> + if (ctrl->max_zeroes_sectors > 0)
> + nvme_get_attr.wzsl = nvme_sectors_to_mps(ctrl, ctrl->max_zeroes_sectors);
> + if (ctrl->max_hw_sectors > 0)
> + nvme_get_attr.mdts = nvme_sectors_to_mps(ctrl, ctrl->max_hw_sectors);
> +
> + retsize = min(usize, NVME_IOCTL_GET_ATTR_CURSZ);
> + ret = copy_to_user((struct nvme_get_attr __user *)arg, &nvme_get_attr, retsize);
> + if (ret)
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
> unsigned len, u32 seed)
> {
> @@ -613,6 +665,8 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
> return nvme_user_cmd(ctrl, NULL, argp, mode);
> case NVME_IOCTL_ADMIN64_CMD:
> return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
> + case NVME_IOCTL_GET_ATTR:
> + return nvme_ctrl_export_attrs(ctrl, argp);
> default:
> return sed_ioctl(ctrl->opal_dev, cmd, argp);
> }
> @@ -659,6 +713,8 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
> return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
> case NVME_IOCTL_IO64_CMD_VEC:
> return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
> + case NVME_IOCTL_GET_ATTR:
> + return nvme_ctrl_export_attrs(ns->ctrl, argp);
> default:
> return -ENOTTY;
> }
> @@ -950,6 +1006,8 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
> return -EACCES;
> nvme_queue_scan(ctrl);
> return 0;
> + case NVME_IOCTL_GET_ATTR:
> + return nvme_ctrl_export_attrs(ctrl, argp);
> default:
> return -ENOTTY;
> }
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index a29877217ee6..07c0d12747b7 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -291,6 +291,9 @@ struct nvme_ctrl {
> u16 crdt[3];
> u16 oncs;
> u32 dmrsl;
> + u64 dmsl;
> + u8 vsl;
> + u8 wusl;
> u16 oacs;
> u16 sqsize;
> u32 max_namespaces;
> @@ -815,6 +818,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
> union nvme_result *result, void *buffer, unsigned bufflen,
> int qid, int at_head,
> blk_mq_req_flags_t flags);
> +
> int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
> unsigned int dword11, void *buffer, size_t buflen,
> u32 *result);
> @@ -1072,4 +1076,11 @@ static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
> }
> #endif /* CONFIG_NVME_VERBOSE_ERRORS */
>
> +/*
> + * List of all nvme ioctl controller attribute calls. Use size of nvme_get_attr
> + * as the version which enables us to use copy_struct_from_user
> + */
> +#define NVME_IOCTL_GET_ATTR_V0SZ 32u
> +#define NVME_IOCTL_GET_ATTR_CURSZ NVME_IOCTL_GET_ATTR_V0SZ
> +
> #endif /* _NVME_H */
> diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
> index 2f76cba67166..f15d2f7363fb 100644
> --- a/include/uapi/linux/nvme_ioctl.h
> +++ b/include/uapi/linux/nvme_ioctl.h
> @@ -92,6 +92,79 @@ struct nvme_uring_cmd {
> __u32 rsvd2;
> };
>
> +
> +/*
> + * This struct is populated from the following nvme commands:
> + * [1] Identify Controller Data Structure
> + * [2] I/O Command Set Specific Identify Controller Data Structure for the
> + * NVME Command Set. Usually contained in struct nvme_id_ctrl_nvm
> + * [3] Controller capabilities on controller initialization
> + */
> +struct nvme_get_attr {
> + __u32 argsz;
> + __u32 flags;
> +
> + /*
> + * Memory Page Size MINimum : The host should not configure a page size that
> + * is larger than (2 ^ (12 + mpsmin)). Comes from [3]
> + */
we are not spelling out these fileds explicitely since description
already present in the spec, so just to make sure abbrivations match
to the fields in the spec (which I think you hv done it pretty nicely).
Please remove these comments as these are inconsistent with what
we have, see host/nvme.h:struct nvme_ctrl {}.
> + __u32 mpsmin;
> +
> + /*
> + * Verify Size Limit : Recommended or supported data size for a verify
> + * command. From [2].
> + */
same here
> + __u8 vsl;
> +
> + /*
> + * Write Zeroes Size Limit : Recommended or supported data size for a
> + * zeroes command. From [2].
> + */
> + __u8 wzsl;
> +
same here
> + /*
> + * Write Uncorrected Size Limit : Recommended or supported data size for
> + * an uncorrected command. From [2].
> + */
> + __u8 wusl;
> +
same here
> + /*
> + * Dataset Management Ranges Limit : Recommended or supported maximum
> + * number of logical block ranges for the Dataset Management Command.
> + * From [2].
> + */
> + __u8 dmrl;
> +
same here
> + /*
> + * Dataset Management Size Limit : Recommended or supported maximum of
> + * total number of logical blocks for a Dataset Management Command.
> + * From [2].
> + */
> + __u64 dmsl;
> +
same here
> + /*
> + * Dataset Management Range Size Limit : Recommended or supported maximum
> + * number of logical blocks in a range of a Dataset Management Command.
> + * From [2].
> + */
> + __u32 dmrsl;
same here
> +
> + /*
> + * Optional NVM Command Support. Is needed to make sense of attributes
> + * like vsl, wzsl, wusl... Comes from [1].
> + */
> + __u16 oncs;
> +
same here
> + /*
> + * Maximum data transfer size. Commands should not exceed this size.
> + * Comes from [1].
> + */
> + __u8 mdts;
> +
same here
> + __u8 rsvd0;
> +
reserving only 8 bits seems not so future proof, why can't we increase
the size of the rsvd to something like
NVMe command size - total size of this struct to make it future proof ?
> +};
> +
> #define nvme_admin_cmd nvme_passthru_cmd
>
> #define NVME_IOCTL_ID _IO('N', 0x40)
> @@ -104,6 +177,7 @@ struct nvme_uring_cmd {
> #define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64)
> #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64)
> #define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64)
> +#define NVME_IOCTL_GET_ATTR _IOWR('N', 0x50, struct nvme_get_attr)
>
> /* io_uring async commands: */
> #define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd)
-ck
More information about the Linux-nvme
mailing list