[PATCH v2 1/1] nvme : Add ioctl to query nvme attributes

Chaitanya Kulkarni chaitanyak at nvidia.com
Thu Nov 10 07:13:04 PST 2022


On 11/10/22 03:05, Joel Granados wrote:
> An ioctl (NVME_IOCTL_GET_ATTR) is added to query nvme controller
> attributes needed to effectively write to the char device without needing
> to be a privileged user.  They are also provided on block devices for
> convenience. The attributes here are meant to complement what you can
> already get with the allowed identify commands.
> 
> New members are added to the nvme_ctrl struct: dmsl, vsl and wusl from
> the I/O Command Set Specific Identify Controller Data Structure in the nvme
> spec.
> 
> We add NVME_IOCTL_GET_ATTR_{V0SZ,CURZS} in order to make the ioctl
> extensible. These serve as both size and version. The caller is expected to
> pass the structure size as the first member of the struct. For example:
> 
> 	{...
> 	struct nvme_get_attr nvme_get_attr = {0};
> 	nvme_get_attr.argsz = sizeof(struct nvme_get_attr);
> 	...}
> 
> Signed-off-by: Joel Granados <j.granados at samsung.com>
> ---
>   drivers/nvme/host/core.c        |  5 ++-
>   drivers/nvme/host/ioctl.c       | 58 ++++++++++++++++++++++++++
>   drivers/nvme/host/nvme.h        | 11 +++++
>   include/uapi/linux/nvme_ioctl.h | 74 +++++++++++++++++++++++++++++++++
>   4 files changed, 147 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 0090dc0b3ae6..267f28592b9d 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3062,9 +3062,12 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
>   
>   	if (id->dmrl)
>   		ctrl->max_discard_segments = id->dmrl;
> -	ctrl->dmrsl = le32_to_cpu(id->dmrsl);
>   	if (id->wzsl)
>   		ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
> +	ctrl->dmrsl = le32_to_cpu(id->dmrsl);
> +	ctrl->dmsl = le64_to_cpu(id->dmsl);
> +	ctrl->vsl = id->vsl;
> +	ctrl->wusl = id->wusl;
>   
>   free_data:
>   	kfree(id);
> diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
> index 9273db147872..ce69659f05b6 100644
> --- a/drivers/nvme/host/ioctl.c
> +++ b/drivers/nvme/host/ioctl.c
> @@ -53,6 +53,58 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
>   	return (void __user *)ptrval;
>   }
>   
> +static inline u32 nvme_sectors_to_mps(struct nvme_ctrl *ctrl, u32 unit)
> +{
> +	return ilog2(unit) + 9 - 12 - NVME_CAP_MPSMIN(ctrl->cap);

please add a detailed comment here what you are doing and why you are
doing here... using magic numbers like 9 and 12 makes code hard to read
for others...

> +}
> +
> +static int nvme_ctrl_export_attrs(struct nvme_ctrl *ctrl,
> +				  struct nvme_get_attr __user *arg)
> +{
> +	struct nvme_get_attr nvme_get_attr = {0};

following will not work ? thats what we have in the code :-
struct nvme_get_attr nvme_get_attr = { };

> +	__u32 usize, retsize;
> +	int ret;
> +
> +	BUILD_BUG_ON(sizeof(struct nvme_get_attr) < NVME_IOCTL_GET_ATTR_V0SZ);
> +	BUILD_BUG_ON(sizeof(struct nvme_get_attr) != NVME_IOCTL_GET_ATTR_CURSZ);
> +

what prevents us from moving this to _nvme_check_size() ?

> +	if (copy_from_user(&nvme_get_attr, arg, 2 * sizeof(__u32)))
> +		return -EFAULT;
> +
> +	if (nvme_get_attr.flags != 0)
> +		return -EINVAL;
> +
> +	if (nvme_get_attr.argsz < NVME_IOCTL_GET_ATTR_V0SZ)
> +		return -EINVAL;
> +	usize = nvme_get_attr.argsz;
> +
> +	/* Enforce backwards compatibility */
> +	ret = copy_struct_from_user(&nvme_get_attr, NVME_IOCTL_GET_ATTR_CURSZ,
> +				    arg, usize);
> +	if (ret)
> +		return ret;
> +
> +	nvme_get_attr.argsz = NVME_IOCTL_GET_ATTR_CURSZ;
> +	nvme_get_attr.mpsmin = NVME_CAP_MPSMIN(ctrl->cap);
> +	nvme_get_attr.vsl = ctrl->vsl;
> +	nvme_get_attr.wusl = ctrl->wusl;
> +	nvme_get_attr.dmrl = ctrl->max_discard_segments;
> +	nvme_get_attr.dmsl = ctrl->dmsl;
> +	nvme_get_attr.dmrsl = ctrl->dmrsl;
> +	nvme_get_attr.oncs = ctrl->oncs;
> +	if (ctrl->max_zeroes_sectors > 0)
> +		nvme_get_attr.wzsl = nvme_sectors_to_mps(ctrl, ctrl->max_zeroes_sectors);
> +	if (ctrl->max_hw_sectors > 0)
> +		nvme_get_attr.mdts = nvme_sectors_to_mps(ctrl, ctrl->max_hw_sectors);
> +
> +	retsize = min(usize, NVME_IOCTL_GET_ATTR_CURSZ);
> +	ret = copy_to_user((struct nvme_get_attr __user *)arg, &nvme_get_attr, retsize);
> +	if (ret)
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>   static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
>   		unsigned len, u32 seed)
>   {
> @@ -613,6 +665,8 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
>   		return nvme_user_cmd(ctrl, NULL, argp, mode);
>   	case NVME_IOCTL_ADMIN64_CMD:
>   		return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
> +	case NVME_IOCTL_GET_ATTR:
> +		return nvme_ctrl_export_attrs(ctrl, argp);
>   	default:
>   		return sed_ioctl(ctrl->opal_dev, cmd, argp);
>   	}
> @@ -659,6 +713,8 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
>   		return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
>   	case NVME_IOCTL_IO64_CMD_VEC:
>   		return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
> +	case NVME_IOCTL_GET_ATTR:
> +		return nvme_ctrl_export_attrs(ns->ctrl, argp);
>   	default:
>   		return -ENOTTY;
>   	}
> @@ -950,6 +1006,8 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
>   			return -EACCES;
>   		nvme_queue_scan(ctrl);
>   		return 0;
> +	case NVME_IOCTL_GET_ATTR:
> +		return nvme_ctrl_export_attrs(ctrl, argp);
>   	default:
>   		return -ENOTTY;
>   	}
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index a29877217ee6..07c0d12747b7 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -291,6 +291,9 @@ struct nvme_ctrl {
>   	u16 crdt[3];
>   	u16 oncs;
>   	u32 dmrsl;
> +	u64 dmsl;
> +	u8 vsl;
> +	u8 wusl;
>   	u16 oacs;
>   	u16 sqsize;
>   	u32 max_namespaces;
> @@ -815,6 +818,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
>   		union nvme_result *result, void *buffer, unsigned bufflen,
>   		int qid, int at_head,
>   		blk_mq_req_flags_t flags);
> +
>   int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
>   		      unsigned int dword11, void *buffer, size_t buflen,
>   		      u32 *result);
> @@ -1072,4 +1076,11 @@ static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
>   }
>   #endif /* CONFIG_NVME_VERBOSE_ERRORS */
>   
> +/*
> + * List of all nvme ioctl controller attribute calls. Use size of nvme_get_attr
> + * as the version which enables us to use copy_struct_from_user
> + */
> +#define NVME_IOCTL_GET_ATTR_V0SZ		32u
> +#define NVME_IOCTL_GET_ATTR_CURSZ		NVME_IOCTL_GET_ATTR_V0SZ
> +
>   #endif /* _NVME_H */
> diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
> index 2f76cba67166..f15d2f7363fb 100644
> --- a/include/uapi/linux/nvme_ioctl.h
> +++ b/include/uapi/linux/nvme_ioctl.h
> @@ -92,6 +92,79 @@ struct nvme_uring_cmd {
>   	__u32   rsvd2;
>   };
>   
> +
> +/*
> + * This struct is populated from the following nvme commands:
> + * [1] Identify Controller Data Structure
> + * [2] I/O Command Set Specific Identify Controller Data Structure for the
> + *     NVME Command Set. Usually contained in struct nvme_id_ctrl_nvm
> + * [3] Controller capabilities on controller initialization
> + */
> +struct nvme_get_attr {
> +	__u32	argsz;
> +	__u32	flags;
> +
> +	/*
> +	 * Memory Page Size MINimum : The host should not configure a page size that
> +	 * is larger than (2 ^ (12 + mpsmin)). Comes from [3]
> +	 */

we are not spelling out these fileds explicitely since description
already present in the spec, so just to make sure abbrivations match
to the fields in the spec (which I think you hv done it pretty nicely).

Please remove these comments as these are inconsistent with what
we have, see host/nvme.h:struct nvme_ctrl {}.


> +	__u32	mpsmin;
> +
> +	/*
> +	 * Verify Size Limit : Recommended or supported data size for a verify
> +	 * command. From [2].
> +	 */

same here

> +	__u8	vsl;
> +
> +	/*
> +	 * Write Zeroes Size Limit : Recommended or supported data size for a
> +	 * zeroes command. From [2].
> +	 */
> +	__u8	wzsl;
> +

same here
> +	/*
> +	 * Write Uncorrected Size Limit : Recommended or supported data size for
> +	 * an uncorrected command. From [2].
> +	 */
> +	__u8	wusl;
> +

same here

> +	/*
> +	 * Dataset Management Ranges Limit : Recommended or supported maximum
> +	 * number of logical block ranges for the Dataset Management Command.
> +	 * From [2].
> +	 */
> +	__u8	dmrl;
> +

same here

> +	/*
> +	 * Dataset Management Size Limit : Recommended or supported maximum of
> +	 * total number of logical blocks for a Dataset Management Command.
> +	 * From [2].
> +	 */
> +	__u64	dmsl;
> +

same here

> +	/*
> +	 * Dataset Management Range Size Limit : Recommended or supported maximum
> +	 * number of logical blocks in a range of a Dataset Management Command.
> +	 * From [2].
> +	 */
> +	__u32	dmrsl;

same here

> +
> +	/*
> +	 * Optional NVM Command Support. Is needed to make sense of attributes
> +	 * like vsl, wzsl, wusl... Comes from [1].
> +	 */
> +	__u16	oncs;
> +

same here


> +	/*
> +	 * Maximum data transfer size. Commands should not exceed this size.
> +	 * Comes from [1].
> +	 */
> +	__u8	mdts;
> +

same here


> +	__u8	rsvd0;
> +

reserving only 8 bits seems not so future proof, why can't we increase
the size of the rsvd to something like
NVMe command size - total size of this struct to make it future proof ?

> +};
> +
>   #define nvme_admin_cmd nvme_passthru_cmd
>   
>   #define NVME_IOCTL_ID		_IO('N', 0x40)
> @@ -104,6 +177,7 @@ struct nvme_uring_cmd {
>   #define NVME_IOCTL_ADMIN64_CMD	_IOWR('N', 0x47, struct nvme_passthru_cmd64)
>   #define NVME_IOCTL_IO64_CMD	_IOWR('N', 0x48, struct nvme_passthru_cmd64)
>   #define NVME_IOCTL_IO64_CMD_VEC	_IOWR('N', 0x49, struct nvme_passthru_cmd64)
> +#define NVME_IOCTL_GET_ATTR	_IOWR('N', 0x50, struct nvme_get_attr)
>   
>   /* io_uring async commands: */
>   #define NVME_URING_CMD_IO	_IOWR('N', 0x80, struct nvme_uring_cmd)


-ck



More information about the Linux-nvme mailing list