[PATCH 4/6] block: introduce IOCTL to report dev properties

Javier González javier at javigon.com
Fri Jun 26 02:22:19 EDT 2020


On 26.06.2020 01:38, Damien Le Moal wrote:
>On 2020/06/25 21:22, Javier González wrote:
>> From: Javier González <javier.gonz at samsung.com>
>>
>> With the addition of ZNS, a new set of properties have been added to the
>> zoned block device. This patch introduces a new IOCTL to expose these
>> rroperties to user space.
>>
>> Signed-off-by: Javier González <javier.gonz at samsung.com>
>> Signed-off-by: SelvaKumar S <selvakuma.s1 at samsung.com>
>> Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
>> Signed-off-by: Nitesh Shetty <nj.shetty at samsung.com>
>> ---
>>  block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
>>  block/ioctl.c                 |  2 ++
>>  drivers/nvme/host/core.c      |  2 ++
>>  drivers/nvme/host/nvme.h      | 11 +++++++
>>  drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
>>  include/linux/blkdev.h        |  9 ++++++
>>  include/uapi/linux/blkzoned.h | 13 ++++++++
>>  7 files changed, 144 insertions(+)
>>
>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>> index 704fc15813d1..39ec72af9537 100644
>> --- a/block/blk-zoned.c
>> +++ b/block/blk-zoned.c
>> @@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>>  }
>>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>
>> +static int blkdev_report_zonedev_prop(struct block_device *bdev,
>> +				      struct blk_zone_dev *zprop)
>> +{
>> +	struct gendisk *disk = bdev->bd_disk;
>> +
>> +	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
>> +		return -EOPNOTSUPP;
>> +
>> +	return disk->fops->report_zone_p(disk, zprop);
>> +}
>> +
>>  static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
>>  						sector_t sector,
>>  						sector_t nr_sectors)
>> @@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>  				GFP_KERNEL);
>>  }
>>
>> +int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>> +			unsigned int cmd, unsigned long arg)
>> +{
>> +	void __user *argp = (void __user *)arg;
>> +	struct request_queue *q;
>> +	struct blk_zone_dev zprop;
>> +	int ret;
>> +
>> +	if (!argp)
>> +		return -EINVAL;
>> +
>> +	q = bdev_get_queue(bdev);
>> +	if (!q)
>> +		return -ENXIO;
>> +
>> +	if (!blk_queue_is_zoned(q))
>> +		return -ENOTTY;
>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EACCES;
>> +
>> +	if (!(mode & FMODE_WRITE))
>> +		return -EBADF;
>> +
>> +	ret = blkdev_report_zonedev_prop(bdev, &zprop);
>> +	if (ret)
>> +		goto out;
>> +
>> +	if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
>> +		return -EFAULT;
>> +
>> +out:
>> +	return ret;
>> +}
>> +
>>  static inline unsigned long *blk_alloc_zone_bitmap(int node,
>>  						   unsigned int nr_zones)
>>  {
>> diff --git a/block/ioctl.c b/block/ioctl.c
>> index 0ea29754e7dd..f7b4e0f2dd4c 100644
>> --- a/block/ioctl.c
>> +++ b/block/ioctl.c
>> @@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>>  		return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>>  	case BLKMGMTZONE:
>>  		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
>> +	case BLKZONEDEVPROP:
>> +		return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>>  	case BLKGETZONESZ:
>>  		return put_uint(argp, bdev_zone_sectors(bdev));
>>  	case BLKGETNRZONES:
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 5b95c81d2a2d..a32c909a915f 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -2254,6 +2254,7 @@ static const struct block_device_operations nvme_fops = {
>>  	.getgeo		= nvme_getgeo,
>>  	.revalidate_disk= nvme_revalidate_disk,
>>  	.report_zones	= nvme_report_zones,
>> +	.report_zone_p	= nvme_report_zone_prop,
>>  	.pr_ops		= &nvme_pr_ops,
>>  };
>>
>> @@ -2280,6 +2281,7 @@ const struct block_device_operations nvme_ns_head_ops = {
>>  	.compat_ioctl	= nvme_compat_ioctl,
>>  	.getgeo		= nvme_getgeo,
>>  	.report_zones	= nvme_report_zones,
>> +	.report_zone_p	= nvme_report_zone_prop,
>>  	.pr_ops		= &nvme_pr_ops,
>>  };
>>  #endif /* CONFIG_NVME_MULTIPATH */
>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>> index ecf443efdf91..172e0531f37f 100644
>> --- a/drivers/nvme/host/nvme.h
>> +++ b/drivers/nvme/host/nvme.h
>> @@ -407,6 +407,14 @@ struct nvme_ns {
>>  	u8 pi_type;
>>  #ifdef CONFIG_BLK_DEV_ZONED
>>  	u64 zsze;
>> +
>> +	u32 nr_zones;
>> +	u32 mar;
>> +	u32 mor;
>> +	u32 rrl;
>> +	u32 frl;
>> +	u16 zoc;
>> +	u16 ozcs;
>>  #endif
>>  	unsigned long features;
>>  	unsigned long flags;
>> @@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>  		      unsigned int nr_zones, report_zones_cb cb, void *data);
>>
>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop);
>> +
>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>  				       struct nvme_command *cmnd,
>>  				       enum nvme_zone_mgmt_action action);
>>  #else
>>  #define nvme_report_zones NULL
>> +#define nvme_report_zone_prop NULL
>>
>>  static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
>>  		struct request *req, struct nvme_command *cmnd,
>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>> index 2e6512ac6f01..258d03610cc0 100644
>> --- a/drivers/nvme/host/zns.c
>> +++ b/drivers/nvme/host/zns.c
>> @@ -32,6 +32,28 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
>>  	return 0;
>>  }
>>
>> +static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
>> +{
>> +	struct nvme_command c = { };
>> +	struct nvme_zone_report report;
>> +	int buflen = sizeof(struct nvme_zone_report);
>> +	int ret;
>> +
>> +	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
>> +	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
>> +	c.zmr.slba = cpu_to_le64(0);
>> +	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
>> +	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
>> +	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
>> +	c.zmr.pr = 0;
>> +
>> +	ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
>> +	if (ret)
>> +		return ret;
>> +
>> +	return le64_to_cpu(report.nr_zones);
>> +}
>> +
>>  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  			  unsigned lbaf)
>>  {
>> @@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  		goto free_data;
>>  	}
>>
>> +	ns->nr_zones = nvme_zns_nr_zones(ns);
>> +	ns->mar = le32_to_cpu(id->mar);
>> +	ns->mor = le32_to_cpu(id->mor);
>> +	ns->rrl = le32_to_cpu(id->rrl);
>> +	ns->frl = le32_to_cpu(id->frl);
>> +	ns->zoc = le16_to_cpu(id->zoc);
>> +
>>  	q->limits.zoned = BLK_ZONED_HM;
>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>  free_data:
>> @@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>  	return ret;
>>  }
>>
>> +static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct blk_zone_dev *zprop)
>> +{
>> +	zprop->nr_zones = ns->nr_zones;
>> +	zprop->zoc = ns->zoc;
>> +	zprop->ozcs = ns->ozcs;
>> +	zprop->mar = ns->mar;
>> +	zprop->mor = ns->mor;
>> +	zprop->rrl = ns->rrl;
>> +	zprop->frl = ns->frl;
>> +
>> +	return 0;
>> +}
>> +
>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop)
>> +{
>> +	struct nvme_ns_head *head = NULL;
>> +	struct nvme_ns *ns;
>> +	int srcu_idx, ret;
>> +
>> +	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>> +	if (unlikely(!ns))
>> +		return -EWOULDBLOCK;
>> +
>> +	if (ns->head->ids.csi == NVME_CSI_ZNS)
>> +		ret = nvme_ns_report_zone_prop(ns, zprop);
>> +	else
>> +		ret = -EINVAL;
>> +	nvme_put_ns_from_disk(head, srcu_idx);
>> +
>> +	return ret;
>> +}
>> +
>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>  		struct nvme_command *c, enum nvme_zone_mgmt_action action)
>>  {
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 8308d8a3720b..0c0faa58b7f4 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>>  				  unsigned int cmd, unsigned long arg);
>>  extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>  				  unsigned int cmd, unsigned long arg);
>> +extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>> +			unsigned int cmd, unsigned long arg);
>>  #else /* CONFIG_BLK_DEV_ZONED */
>>
>>  static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
>> @@ -400,6 +402,12 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>>  	return -ENOTTY;
>>  }
>>
>> +static inline int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>> +				      unsigned int cmd, unsigned long arg)
>> +{
>> +	return -ENOTTY;
>> +}
>> +
>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>
>>  struct request_queue {
>> @@ -1770,6 +1778,7 @@ struct block_device_operations {
>>  	int (*report_zones)(struct gendisk *, sector_t sector,
>>  			unsigned int nr_zones, report_zones_cb cb, void *data);
>>  	char *(*devnode)(struct gendisk *disk, umode_t *mode);
>> +	int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev *zprop);
>>  	struct module *owner;
>>  	const struct pr_ops *pr_ops;
>>  };
>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>> index d0978ee10fc7..0c49a4b2ce5d 100644
>> --- a/include/uapi/linux/blkzoned.h
>> +++ b/include/uapi/linux/blkzoned.h
>> @@ -142,6 +142,18 @@ struct blk_zone_range {
>>  	__u64		nr_sectors;
>>  };
>>
>> +struct blk_zone_dev {
>> +	__u32	nr_zones;
>> +	__u32	mar;
>> +	__u32	mor;
>> +	__u32	rrl;
>> +	__u32	frl;
>> +	__u16	zoc;
>> +	__u16	ozcs;
>> +	__u32	rsv31[2];
>> +	__u64	rsv63[4];
>> +};
>> +
>>  /**
>>   * enum blk_zone_action - Zone state transitions managed from user-space
>>   *
>> @@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>>  #define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
>>  #define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
>>  #define BLKMGMTZONE	_IOR(0x12, 137, struct blk_zone_mgmt)
>> +#define BLKZONEDEVPROP	_IOR(0x12, 138, struct blk_zone_dev)
>>
>>  #endif /* _UAPI_BLKZONED_H */
>>
>
>As commented already, NVMe passthrough or sysfs device attributes would be much
>better. See scsi: there is no IOCTL defined to obtain every single log page or
>mode page defined. Passthrough is the interface to do that. For frequently used
>log pages giving device information, sysfs is used as a cache. See all the
>"vpd_pgXX" entries under /sys/block/sdX/device. All of this is done by the
>driver. Not the block layer.

Ok. Let me look into that. I was hesitant to do this as (i) it is a lot
of parameters and (ii) naming ATM is a bit inconsistent. Guess we could
group them all under zone_* prefix for new parameters.

Javier



More information about the Linux-nvme mailing list