[PATCH v2 1/1] nvme : Add ioctl to query nvme attributes

Joel Granados j.granados at samsung.com
Sat Nov 12 01:38:27 PST 2022


On Thu, Nov 10, 2022 at 03:13:04PM +0000, Chaitanya Kulkarni wrote:
> On 11/10/22 03:05, Joel Granados wrote:
> > An ioctl (NVME_IOCTL_GET_ATTR) is added to query nvme controller
> > attributes needed to effectively write to the char device without needing
> > to be a privileged user.  They are also provided on block devices for
> > convenience. The attributes here are meant to complement what you can
> > already get with the allowed identify commands.
> > 
> > New members are added to the nvme_ctrl struct: dmsl, vsl and wusl from
> > the I/O Command Set Specific Identify Controller Data Structure in the nvme
> > spec.
> > 
> > We add NVME_IOCTL_GET_ATTR_{V0SZ,CURZS} in order to make the ioctl
> > extensible. These serve as both size and version. The caller is expected to
> > pass the structure size as the first member of the struct. For example:
> > 
> > 	{...
> > 	struct nvme_get_attr nvme_get_attr = {0};
> > 	nvme_get_attr.argsz = sizeof(struct nvme_get_attr);
> > 	...}
> > 
> > Signed-off-by: Joel Granados <j.granados at samsung.com>
> > ---
> >   drivers/nvme/host/core.c        |  5 ++-
> >   drivers/nvme/host/ioctl.c       | 58 ++++++++++++++++++++++++++
> >   drivers/nvme/host/nvme.h        | 11 +++++
> >   include/uapi/linux/nvme_ioctl.h | 74 +++++++++++++++++++++++++++++++++
> >   4 files changed, 147 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 0090dc0b3ae6..267f28592b9d 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -3062,9 +3062,12 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
> >   
> >   	if (id->dmrl)
> >   		ctrl->max_discard_segments = id->dmrl;
> > -	ctrl->dmrsl = le32_to_cpu(id->dmrsl);
> >   	if (id->wzsl)
> >   		ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
> > +	ctrl->dmrsl = le32_to_cpu(id->dmrsl);
> > +	ctrl->dmsl = le64_to_cpu(id->dmsl);
> > +	ctrl->vsl = id->vsl;
> > +	ctrl->wusl = id->wusl;
> >   
> >   free_data:
> >   	kfree(id);
> > diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
> > index 9273db147872..ce69659f05b6 100644
> > --- a/drivers/nvme/host/ioctl.c
> > +++ b/drivers/nvme/host/ioctl.c
> > @@ -53,6 +53,58 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
> >   	return (void __user *)ptrval;
> >   }
> >   
> > +static inline u32 nvme_sectors_to_mps(struct nvme_ctrl *ctrl, u32 unit)
> > +{
> > +	return ilog2(unit) + 9 - 12 - NVME_CAP_MPSMIN(ctrl->cap);
> 
> please add a detailed comment here what you are doing and why you are
> doing here... using magic numbers like 9 and 12 makes code hard to read
> for others...

Added a comment. We have several places where we use 12 and 9
"magically" to convert from NVMe spec to kernel sector values. Would it
make sense to #define this as NVME_CONST_MPSMIN_POW2_START. We can
definitely work on the name :)

> 
> > +}
> > +
> > +static int nvme_ctrl_export_attrs(struct nvme_ctrl *ctrl,
> > +				  struct nvme_get_attr __user *arg)
> > +{
> > +	struct nvme_get_attr nvme_get_attr = {0};
> 
> following will not work ? thats what we have in the code :-
> struct nvme_get_attr nvme_get_attr = { };
Sure. lets go with the way nvme driver chose to do it.

> 
> > +	__u32 usize, retsize;
> > +	int ret;
> > +
> > +	BUILD_BUG_ON(sizeof(struct nvme_get_attr) < NVME_IOCTL_GET_ATTR_V0SZ);
> > +	BUILD_BUG_ON(sizeof(struct nvme_get_attr) != NVME_IOCTL_GET_ATTR_CURSZ);
> > +
> 
> what prevents us from moving this to _nvme_check_size() ?
That seems like a better place. thx for pointing it out.

> 
> > +	if (copy_from_user(&nvme_get_attr, arg, 2 * sizeof(__u32)))
> > +		return -EFAULT;
> > +
> > +	if (nvme_get_attr.flags != 0)
> > +		return -EINVAL;
> > +
> > +	if (nvme_get_attr.argsz < NVME_IOCTL_GET_ATTR_V0SZ)
> > +		return -EINVAL;
> > +	usize = nvme_get_attr.argsz;
> > +
> > +	/* Enforce backwards compatibility */
> > +	ret = copy_struct_from_user(&nvme_get_attr, NVME_IOCTL_GET_ATTR_CURSZ,
> > +				    arg, usize);
> > +	if (ret)
> > +		return ret;
> > +
> > +	nvme_get_attr.argsz = NVME_IOCTL_GET_ATTR_CURSZ;
> > +	nvme_get_attr.mpsmin = NVME_CAP_MPSMIN(ctrl->cap);
> > +	nvme_get_attr.vsl = ctrl->vsl;
> > +	nvme_get_attr.wusl = ctrl->wusl;
> > +	nvme_get_attr.dmrl = ctrl->max_discard_segments;
> > +	nvme_get_attr.dmsl = ctrl->dmsl;
> > +	nvme_get_attr.dmrsl = ctrl->dmrsl;
> > +	nvme_get_attr.oncs = ctrl->oncs;
> > +	if (ctrl->max_zeroes_sectors > 0)
> > +		nvme_get_attr.wzsl = nvme_sectors_to_mps(ctrl, ctrl->max_zeroes_sectors);
> > +	if (ctrl->max_hw_sectors > 0)
> > +		nvme_get_attr.mdts = nvme_sectors_to_mps(ctrl, ctrl->max_hw_sectors);
> > +
> > +	retsize = min(usize, NVME_IOCTL_GET_ATTR_CURSZ);
> > +	ret = copy_to_user((struct nvme_get_attr __user *)arg, &nvme_get_attr, retsize);
> > +	if (ret)
> > +		return -EFAULT;
> > +
> > +	return 0;
> > +}
> > +
> >   static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
> >   		unsigned len, u32 seed)
> >   {
> > @@ -613,6 +665,8 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
> >   		return nvme_user_cmd(ctrl, NULL, argp, mode);
> >   	case NVME_IOCTL_ADMIN64_CMD:
> >   		return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
> > +	case NVME_IOCTL_GET_ATTR:
> > +		return nvme_ctrl_export_attrs(ctrl, argp);
> >   	default:
> >   		return sed_ioctl(ctrl->opal_dev, cmd, argp);
> >   	}
> > @@ -659,6 +713,8 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
> >   		return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
> >   	case NVME_IOCTL_IO64_CMD_VEC:
> >   		return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
> > +	case NVME_IOCTL_GET_ATTR:
> > +		return nvme_ctrl_export_attrs(ns->ctrl, argp);
> >   	default:
> >   		return -ENOTTY;
> >   	}
> > @@ -950,6 +1006,8 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
> >   			return -EACCES;
> >   		nvme_queue_scan(ctrl);
> >   		return 0;
> > +	case NVME_IOCTL_GET_ATTR:
> > +		return nvme_ctrl_export_attrs(ctrl, argp);
> >   	default:
> >   		return -ENOTTY;
> >   	}
> > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> > index a29877217ee6..07c0d12747b7 100644
> > --- a/drivers/nvme/host/nvme.h
> > +++ b/drivers/nvme/host/nvme.h
> > @@ -291,6 +291,9 @@ struct nvme_ctrl {
> >   	u16 crdt[3];
> >   	u16 oncs;
> >   	u32 dmrsl;
> > +	u64 dmsl;
> > +	u8 vsl;
> > +	u8 wusl;
> >   	u16 oacs;
> >   	u16 sqsize;
> >   	u32 max_namespaces;
> > @@ -815,6 +818,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
> >   		union nvme_result *result, void *buffer, unsigned bufflen,
> >   		int qid, int at_head,
> >   		blk_mq_req_flags_t flags);
> > +
> >   int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
> >   		      unsigned int dword11, void *buffer, size_t buflen,
> >   		      u32 *result);
> > @@ -1072,4 +1076,11 @@ static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
> >   }
> >   #endif /* CONFIG_NVME_VERBOSE_ERRORS */
> >   
> > +/*
> > + * List of all nvme ioctl controller attribute calls. Use size of nvme_get_attr
> > + * as the version which enables us to use copy_struct_from_user
> > + */
> > +#define NVME_IOCTL_GET_ATTR_V0SZ		32u
> > +#define NVME_IOCTL_GET_ATTR_CURSZ		NVME_IOCTL_GET_ATTR_V0SZ
> > +
> >   #endif /* _NVME_H */
> > diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
> > index 2f76cba67166..f15d2f7363fb 100644
> > --- a/include/uapi/linux/nvme_ioctl.h
> > +++ b/include/uapi/linux/nvme_ioctl.h
> > @@ -92,6 +92,79 @@ struct nvme_uring_cmd {
> >   	__u32   rsvd2;
> >   };
> >   
> > +
> > +/*
> > + * This struct is populated from the following nvme commands:
> > + * [1] Identify Controller Data Structure
> > + * [2] I/O Command Set Specific Identify Controller Data Structure for the
> > + *     NVME Command Set. Usually contained in struct nvme_id_ctrl_nvm
> > + * [3] Controller capabilities on controller initialization
> > + */
> > +struct nvme_get_attr {
> > +	__u32	argsz;
> > +	__u32	flags;
> > +
> > +	/*
> > +	 * Memory Page Size MINimum : The host should not configure a page size that
> > +	 * is larger than (2 ^ (12 + mpsmin)). Comes from [3]
> > +	 */
> 
> we are not spelling out these fileds explicitely since description
> already present in the spec, so just to make sure abbrivations match
> to the fields in the spec (which I think you hv done it pretty nicely).
> 
> Please remove these comments as these are inconsistent with what
> we have, see host/nvme.h:struct nvme_ctrl {}.
> 
> 
> > +	__u32	mpsmin;
> > +
> > +	/*
> > +	 * Verify Size Limit : Recommended or supported data size for a verify
> > +	 * command. From [2].
> > +	 */
> 
> same here
> 
> > +	__u8	vsl;
> > +
> > +	/*
> > +	 * Write Zeroes Size Limit : Recommended or supported data size for a
> > +	 * zeroes command. From [2].
> > +	 */
> > +	__u8	wzsl;
> > +
> 
> same here
> > +	/*
> > +	 * Write Uncorrected Size Limit : Recommended or supported data size for
> > +	 * an uncorrected command. From [2].
> > +	 */
> > +	__u8	wusl;
> > +
> 
> same here
> 
> > +	/*
> > +	 * Dataset Management Ranges Limit : Recommended or supported maximum
> > +	 * number of logical block ranges for the Dataset Management Command.
> > +	 * From [2].
> > +	 */
> > +	__u8	dmrl;
> > +
> 
> same here
> 
> > +	/*
> > +	 * Dataset Management Size Limit : Recommended or supported maximum of
> > +	 * total number of logical blocks for a Dataset Management Command.
> > +	 * From [2].
> > +	 */
> > +	__u64	dmsl;
> > +
> 
> same here
> 
> > +	/*
> > +	 * Dataset Management Range Size Limit : Recommended or supported maximum
> > +	 * number of logical blocks in a range of a Dataset Management Command.
> > +	 * From [2].
> > +	 */
> > +	__u32	dmrsl;
> 
> same here
> 
> > +
> > +	/*
> > +	 * Optional NVM Command Support. Is needed to make sense of attributes
> > +	 * like vsl, wzsl, wusl... Comes from [1].
> > +	 */
> > +	__u16	oncs;
> > +
> 
> same here
> 
> 
> > +	/*
> > +	 * Maximum data transfer size. Commands should not exceed this size.
> > +	 * Comes from [1].
> > +	 */
> > +	__u8	mdts;
> > +
> 
> same here
> 
> 
> > +	__u8	rsvd0;
> > +
> 
> reserving only 8 bits seems not so future proof, why can't we increase
> the size of the rsvd to something like
> NVMe command size - total size of this struct to make it future proof ?

I actually added this to plug a hole that I saw in pahole on my 64 bit
machine, It has nothing to do with future proofing the ioctl.
Future proofing is implemented in the nvme_ctrl_export_attrs function
by handling different struct sizes with copy_struct_from_user.

Will remove it.

Thx again for the feedback
> 
> > +};
> > +
> >   #define nvme_admin_cmd nvme_passthru_cmd
> >   
> >   #define NVME_IOCTL_ID		_IO('N', 0x40)
> > @@ -104,6 +177,7 @@ struct nvme_uring_cmd {
> >   #define NVME_IOCTL_ADMIN64_CMD	_IOWR('N', 0x47, struct nvme_passthru_cmd64)
> >   #define NVME_IOCTL_IO64_CMD	_IOWR('N', 0x48, struct nvme_passthru_cmd64)
> >   #define NVME_IOCTL_IO64_CMD_VEC	_IOWR('N', 0x49, struct nvme_passthru_cmd64)
> > +#define NVME_IOCTL_GET_ATTR	_IOWR('N', 0x50, struct nvme_get_attr)
> >   
> >   /* io_uring async commands: */
> >   #define NVME_URING_CMD_IO	_IOWR('N', 0x80, struct nvme_uring_cmd)
> 
> 
> -ck
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 659 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-nvme/attachments/20221112/3ec759db/attachment.sig>


More information about the Linux-nvme mailing list