[PATCH v2 1/1] nvme : Add ioctl to query nvme attributes
Joel Granados
j.granados at samsung.com
Sat Nov 12 01:38:27 PST 2022
On Thu, Nov 10, 2022 at 03:13:04PM +0000, Chaitanya Kulkarni wrote:
> On 11/10/22 03:05, Joel Granados wrote:
> > An ioctl (NVME_IOCTL_GET_ATTR) is added to query nvme controller
> > attributes needed to effectively write to the char device without needing
> > to be a privileged user. They are also provided on block devices for
> > convenience. The attributes here are meant to complement what you can
> > already get with the allowed identify commands.
> >
> > New members are added to the nvme_ctrl struct: dmsl, vsl and wusl from
> > the I/O Command Set Specific Identify Controller Data Structure in the nvme
> > spec.
> >
> > We add NVME_IOCTL_GET_ATTR_{V0SZ,CURZS} in order to make the ioctl
> > extensible. These serve as both size and version. The caller is expected to
> > pass the structure size as the first member of the struct. For example:
> >
> > {...
> > struct nvme_get_attr nvme_get_attr = {0};
> > nvme_get_attr.argsz = sizeof(struct nvme_get_attr);
> > ...}
> >
> > Signed-off-by: Joel Granados <j.granados at samsung.com>
> > ---
> > drivers/nvme/host/core.c | 5 ++-
> > drivers/nvme/host/ioctl.c | 58 ++++++++++++++++++++++++++
> > drivers/nvme/host/nvme.h | 11 +++++
> > include/uapi/linux/nvme_ioctl.h | 74 +++++++++++++++++++++++++++++++++
> > 4 files changed, 147 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 0090dc0b3ae6..267f28592b9d 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -3062,9 +3062,12 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
> >
> > if (id->dmrl)
> > ctrl->max_discard_segments = id->dmrl;
> > - ctrl->dmrsl = le32_to_cpu(id->dmrsl);
> > if (id->wzsl)
> > ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
> > + ctrl->dmrsl = le32_to_cpu(id->dmrsl);
> > + ctrl->dmsl = le64_to_cpu(id->dmsl);
> > + ctrl->vsl = id->vsl;
> > + ctrl->wusl = id->wusl;
> >
> > free_data:
> > kfree(id);
> > diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
> > index 9273db147872..ce69659f05b6 100644
> > --- a/drivers/nvme/host/ioctl.c
> > +++ b/drivers/nvme/host/ioctl.c
> > @@ -53,6 +53,58 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
> > return (void __user *)ptrval;
> > }
> >
> > +static inline u32 nvme_sectors_to_mps(struct nvme_ctrl *ctrl, u32 unit)
> > +{
> > + return ilog2(unit) + 9 - 12 - NVME_CAP_MPSMIN(ctrl->cap);
>
> please add a detailed comment here what you are doing and why you are
> doing here... using magic numbers like 9 and 12 makes code hard to read
> for others...
Added a comment. We have several places where we use 12 and 9
"magically" to convert from NVMe spec to kernel sector values. Would it
make sense to #define this as NVME_CONST_MPSMIN_POW2_START. We can
definitely work on the name :)
>
> > +}
> > +
> > +static int nvme_ctrl_export_attrs(struct nvme_ctrl *ctrl,
> > + struct nvme_get_attr __user *arg)
> > +{
> > + struct nvme_get_attr nvme_get_attr = {0};
>
> following will not work ? thats what we have in the code :-
> struct nvme_get_attr nvme_get_attr = { };
Sure. lets go with the way nvme driver chose to do it.
>
> > + __u32 usize, retsize;
> > + int ret;
> > +
> > + BUILD_BUG_ON(sizeof(struct nvme_get_attr) < NVME_IOCTL_GET_ATTR_V0SZ);
> > + BUILD_BUG_ON(sizeof(struct nvme_get_attr) != NVME_IOCTL_GET_ATTR_CURSZ);
> > +
>
> what prevents us from moving this to _nvme_check_size() ?
That seems like a better place. thx for pointing it out.
>
> > + if (copy_from_user(&nvme_get_attr, arg, 2 * sizeof(__u32)))
> > + return -EFAULT;
> > +
> > + if (nvme_get_attr.flags != 0)
> > + return -EINVAL;
> > +
> > + if (nvme_get_attr.argsz < NVME_IOCTL_GET_ATTR_V0SZ)
> > + return -EINVAL;
> > + usize = nvme_get_attr.argsz;
> > +
> > + /* Enforce backwards compatibility */
> > + ret = copy_struct_from_user(&nvme_get_attr, NVME_IOCTL_GET_ATTR_CURSZ,
> > + arg, usize);
> > + if (ret)
> > + return ret;
> > +
> > + nvme_get_attr.argsz = NVME_IOCTL_GET_ATTR_CURSZ;
> > + nvme_get_attr.mpsmin = NVME_CAP_MPSMIN(ctrl->cap);
> > + nvme_get_attr.vsl = ctrl->vsl;
> > + nvme_get_attr.wusl = ctrl->wusl;
> > + nvme_get_attr.dmrl = ctrl->max_discard_segments;
> > + nvme_get_attr.dmsl = ctrl->dmsl;
> > + nvme_get_attr.dmrsl = ctrl->dmrsl;
> > + nvme_get_attr.oncs = ctrl->oncs;
> > + if (ctrl->max_zeroes_sectors > 0)
> > + nvme_get_attr.wzsl = nvme_sectors_to_mps(ctrl, ctrl->max_zeroes_sectors);
> > + if (ctrl->max_hw_sectors > 0)
> > + nvme_get_attr.mdts = nvme_sectors_to_mps(ctrl, ctrl->max_hw_sectors);
> > +
> > + retsize = min(usize, NVME_IOCTL_GET_ATTR_CURSZ);
> > + ret = copy_to_user((struct nvme_get_attr __user *)arg, &nvme_get_attr, retsize);
> > + if (ret)
> > + return -EFAULT;
> > +
> > + return 0;
> > +}
> > +
> > static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
> > unsigned len, u32 seed)
> > {
> > @@ -613,6 +665,8 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
> > return nvme_user_cmd(ctrl, NULL, argp, mode);
> > case NVME_IOCTL_ADMIN64_CMD:
> > return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
> > + case NVME_IOCTL_GET_ATTR:
> > + return nvme_ctrl_export_attrs(ctrl, argp);
> > default:
> > return sed_ioctl(ctrl->opal_dev, cmd, argp);
> > }
> > @@ -659,6 +713,8 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
> > return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
> > case NVME_IOCTL_IO64_CMD_VEC:
> > return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
> > + case NVME_IOCTL_GET_ATTR:
> > + return nvme_ctrl_export_attrs(ns->ctrl, argp);
> > default:
> > return -ENOTTY;
> > }
> > @@ -950,6 +1006,8 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
> > return -EACCES;
> > nvme_queue_scan(ctrl);
> > return 0;
> > + case NVME_IOCTL_GET_ATTR:
> > + return nvme_ctrl_export_attrs(ctrl, argp);
> > default:
> > return -ENOTTY;
> > }
> > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> > index a29877217ee6..07c0d12747b7 100644
> > --- a/drivers/nvme/host/nvme.h
> > +++ b/drivers/nvme/host/nvme.h
> > @@ -291,6 +291,9 @@ struct nvme_ctrl {
> > u16 crdt[3];
> > u16 oncs;
> > u32 dmrsl;
> > + u64 dmsl;
> > + u8 vsl;
> > + u8 wusl;
> > u16 oacs;
> > u16 sqsize;
> > u32 max_namespaces;
> > @@ -815,6 +818,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
> > union nvme_result *result, void *buffer, unsigned bufflen,
> > int qid, int at_head,
> > blk_mq_req_flags_t flags);
> > +
> > int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
> > unsigned int dword11, void *buffer, size_t buflen,
> > u32 *result);
> > @@ -1072,4 +1076,11 @@ static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
> > }
> > #endif /* CONFIG_NVME_VERBOSE_ERRORS */
> >
> > +/*
> > + * List of all nvme ioctl controller attribute calls. Use size of nvme_get_attr
> > + * as the version which enables us to use copy_struct_from_user
> > + */
> > +#define NVME_IOCTL_GET_ATTR_V0SZ 32u
> > +#define NVME_IOCTL_GET_ATTR_CURSZ NVME_IOCTL_GET_ATTR_V0SZ
> > +
> > #endif /* _NVME_H */
> > diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
> > index 2f76cba67166..f15d2f7363fb 100644
> > --- a/include/uapi/linux/nvme_ioctl.h
> > +++ b/include/uapi/linux/nvme_ioctl.h
> > @@ -92,6 +92,79 @@ struct nvme_uring_cmd {
> > __u32 rsvd2;
> > };
> >
> > +
> > +/*
> > + * This struct is populated from the following nvme commands:
> > + * [1] Identify Controller Data Structure
> > + * [2] I/O Command Set Specific Identify Controller Data Structure for the
> > + * NVME Command Set. Usually contained in struct nvme_id_ctrl_nvm
> > + * [3] Controller capabilities on controller initialization
> > + */
> > +struct nvme_get_attr {
> > + __u32 argsz;
> > + __u32 flags;
> > +
> > + /*
> > + * Memory Page Size MINimum : The host should not configure a page size that
> > + * is larger than (2 ^ (12 + mpsmin)). Comes from [3]
> > + */
>
> we are not spelling out these fileds explicitely since description
> already present in the spec, so just to make sure abbrivations match
> to the fields in the spec (which I think you hv done it pretty nicely).
>
> Please remove these comments as these are inconsistent with what
> we have, see host/nvme.h:struct nvme_ctrl {}.
>
>
> > + __u32 mpsmin;
> > +
> > + /*
> > + * Verify Size Limit : Recommended or supported data size for a verify
> > + * command. From [2].
> > + */
>
> same here
>
> > + __u8 vsl;
> > +
> > + /*
> > + * Write Zeroes Size Limit : Recommended or supported data size for a
> > + * zeroes command. From [2].
> > + */
> > + __u8 wzsl;
> > +
>
> same here
> > + /*
> > + * Write Uncorrected Size Limit : Recommended or supported data size for
> > + * an uncorrected command. From [2].
> > + */
> > + __u8 wusl;
> > +
>
> same here
>
> > + /*
> > + * Dataset Management Ranges Limit : Recommended or supported maximum
> > + * number of logical block ranges for the Dataset Management Command.
> > + * From [2].
> > + */
> > + __u8 dmrl;
> > +
>
> same here
>
> > + /*
> > + * Dataset Management Size Limit : Recommended or supported maximum of
> > + * total number of logical blocks for a Dataset Management Command.
> > + * From [2].
> > + */
> > + __u64 dmsl;
> > +
>
> same here
>
> > + /*
> > + * Dataset Management Range Size Limit : Recommended or supported maximum
> > + * number of logical blocks in a range of a Dataset Management Command.
> > + * From [2].
> > + */
> > + __u32 dmrsl;
>
> same here
>
> > +
> > + /*
> > + * Optional NVM Command Support. Is needed to make sense of attributes
> > + * like vsl, wzsl, wusl... Comes from [1].
> > + */
> > + __u16 oncs;
> > +
>
> same here
>
>
> > + /*
> > + * Maximum data transfer size. Commands should not exceed this size.
> > + * Comes from [1].
> > + */
> > + __u8 mdts;
> > +
>
> same here
>
>
> > + __u8 rsvd0;
> > +
>
> reserving only 8 bits seems not so future proof, why can't we increase
> the size of the rsvd to something like
> NVMe command size - total size of this struct to make it future proof ?
I actually added this to plug a hole that I saw in pahole on my 64 bit
machine, It has nothing to do with future proofing the ioctl.
Future proofing is implemented in the nvme_ctrl_export_attrs function
by handling different struct sizes with copy_struct_from_user.
Will remove it.
Thx again for the feedback
>
> > +};
> > +
> > #define nvme_admin_cmd nvme_passthru_cmd
> >
> > #define NVME_IOCTL_ID _IO('N', 0x40)
> > @@ -104,6 +177,7 @@ struct nvme_uring_cmd {
> > #define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64)
> > #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64)
> > #define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64)
> > +#define NVME_IOCTL_GET_ATTR _IOWR('N', 0x50, struct nvme_get_attr)
> >
> > /* io_uring async commands: */
> > #define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd)
>
>
> -ck
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 659 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-nvme/attachments/20221112/3ec759db/attachment.sig>
More information about the Linux-nvme
mailing list