[PATCH 1/6] block: introduce IOCTL for zone mgmt

Damien Le Moal Damien.LeMoal at wdc.com
Fri Jun 26 02:37:33 EDT 2020


On 2020/06/26 15:01, Javier González wrote:
> On 26.06.2020 01:17, Damien Le Moal wrote:
>> On 2020/06/25 21:22, Javier González wrote:
>>> From: Javier González <javier.gonz at samsung.com>
>>>
>>> The current IOCTL interface for zone management is limited by struct
>>> blk_zone_range, which is unfortunately not extensible. Specially, the
>>> lack of flags is problematic for ZNS zoned devices.
>>>
>>> This new IOCTL is designed to be a superset of the current one, with
>>> support for flags and bits for extensibility.
>>>
>>> Signed-off-by: Javier González <javier.gonz at samsung.com>
>>> Signed-off-by: SelvaKumar S <selvakuma.s1 at samsung.com>
>>> Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
>>> Signed-off-by: Nitesh Shetty <nj.shetty at samsung.com>
>>> ---
>>>  block/blk-zoned.c             | 56 ++++++++++++++++++++++++++++++++++-
>>>  block/ioctl.c                 |  2 ++
>>>  include/linux/blkdev.h        |  9 ++++++
>>>  include/uapi/linux/blkzoned.h | 33 +++++++++++++++++++++
>>>  4 files changed, 99 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>> index 81152a260354..e87c60004dc5 100644
>>> --- a/block/blk-zoned.c
>>> +++ b/block/blk-zoned.c
>>> @@ -322,7 +322,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
>>>   * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
>>>   * Called from blkdev_ioctl.
>>>   */
>>> -int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>> +int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>>>  			   unsigned int cmd, unsigned long arg)
>>>  {
>>>  	void __user *argp = (void __user *)arg;
>>> @@ -370,6 +370,60 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>>  				GFP_KERNEL);
>>>  }
>>>
>>> +/*
>>> + * Zone management ioctl processing. Extension of blkdev_zone_ops_ioctl(), with
>>> + * blk_zone_mgmt structure.
>>> + *
>>> + * Called from blkdev_ioctl.
>>> + */
>>> +int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>> +			   unsigned int cmd, unsigned long arg)
>>> +{
>>> +	void __user *argp = (void __user *)arg;
>>> +	struct request_queue *q;
>>> +	struct blk_zone_mgmt zmgmt;
>>> +	enum req_opf op;
>>> +
>>> +	if (!argp)
>>> +		return -EINVAL;
>>> +
>>> +	q = bdev_get_queue(bdev);
>>> +	if (!q)
>>> +		return -ENXIO;
>>> +
>>> +	if (!blk_queue_is_zoned(q))
>>> +		return -ENOTTY;
>>> +
>>> +	if (!capable(CAP_SYS_ADMIN))
>>> +		return -EACCES;
>>> +
>>> +	if (!(mode & FMODE_WRITE))
>>> +		return -EBADF;
>>> +
>>> +	if (copy_from_user(&zmgmt, argp, sizeof(struct blk_zone_mgmt)))
>>> +		return -EFAULT;
>>> +
>>> +	switch (zmgmt.action) {
>>> +	case BLK_ZONE_MGMT_CLOSE:
>>> +		op = REQ_OP_ZONE_CLOSE;
>>> +		break;
>>> +	case BLK_ZONE_MGMT_FINISH:
>>> +		op = REQ_OP_ZONE_FINISH;
>>> +		break;
>>> +	case BLK_ZONE_MGMT_OPEN:
>>> +		op = REQ_OP_ZONE_OPEN;
>>> +		break;
>>> +	case BLK_ZONE_MGMT_RESET:
>>> +		op = REQ_OP_ZONE_RESET;
>>> +		break;
>>> +	default:
>>> +		return -ENOTTY;
>>> +	}
>>> +
>>> +	return blkdev_zone_mgmt(bdev, op, zmgmt.sector, zmgmt.nr_sectors,
>>> +				GFP_KERNEL);
>>> +}
>>> +
>>>  static inline unsigned long *blk_alloc_zone_bitmap(int node,
>>>  						   unsigned int nr_zones)
>>>  {
>>> diff --git a/block/ioctl.c b/block/ioctl.c
>>> index bdb3bbb253d9..0ea29754e7dd 100644
>>> --- a/block/ioctl.c
>>> +++ b/block/ioctl.c
>>> @@ -514,6 +514,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>>>  	case BLKOPENZONE:
>>>  	case BLKCLOSEZONE:
>>>  	case BLKFINISHZONE:
>>> +		return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>>> +	case BLKMGMTZONE:
>>>  		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
>>>  	case BLKGETZONESZ:
>>>  		return put_uint(argp, bdev_zone_sectors(bdev));
>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>> index 8fd900998b4e..bd8521f94dc4 100644
>>> --- a/include/linux/blkdev.h
>>> +++ b/include/linux/blkdev.h
>>> @@ -368,6 +368,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
>>>
>>>  extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
>>>  				     unsigned int cmd, unsigned long arg);
>>> +extern int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>>> +				  unsigned int cmd, unsigned long arg);
>>>  extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>>  				  unsigned int cmd, unsigned long arg);
>>>
>>> @@ -385,6 +387,13 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
>>>  	return -ENOTTY;
>>>  }
>>>
>>> +
>>> +static inline int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>>> +					unsigned int cmd, unsigned long arg)
>>> +{
>>> +	return -ENOTTY;
>>> +}
>>> +
>>>  static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>>>  					 fmode_t mode, unsigned int cmd,
>>>  					 unsigned long arg)
>>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>>> index 42c3366cc25f..07b5fde21d9f 100644
>>> --- a/include/uapi/linux/blkzoned.h
>>> +++ b/include/uapi/linux/blkzoned.h
>>> @@ -142,6 +142,38 @@ struct blk_zone_range {
>>>  	__u64		nr_sectors;
>>>  };
>>>
>>> +/**
>>> + * enum blk_zone_action - Zone state transitions managed from user-space
>>> + *
>>> + * @BLK_ZONE_MGMT_CLOSE: Transition to Closed state
>>> + * @BLK_ZONE_MGMT_FINISH: Transition to Finish state
>>> + * @BLK_ZONE_MGMT_OPEN: Transition to Open state
>>> + * @BLK_ZONE_MGMT_RESET: Transition to Reset state
>>> + */
>>> +enum blk_zone_action {
>>> +	BLK_ZONE_MGMT_CLOSE	= 0x1,
>>> +	BLK_ZONE_MGMT_FINISH	= 0x2,
>>> +	BLK_ZONE_MGMT_OPEN	= 0x3,
>>> +	BLK_ZONE_MGMT_RESET	= 0x4,
>>> +};
>>> +
>>> +/**
>>> + * struct blk_zone_mgmt - Extended zoned management
>>> + *
>>> + * @action: Zone action as in described in enum blk_zone_action
>>> + * @flags: Flags for the action
>>> + * @sector: Starting sector of the first zone to operate on
>>> + * @nr_sectors: Total number of sectors of all zones to operate on
>>> + */
>>> +struct blk_zone_mgmt {
>>> +	__u8		action;
>>> +	__u8		resv3[3];
>>> +	__u32		flags;
>>> +	__u64		sector;
>>> +	__u64		nr_sectors;
>>> +	__u64		resv31;
>>> +};
>>> +
>>>  /**
>>>   * Zoned block device ioctl's:
>>>   *
>>> @@ -166,5 +198,6 @@ struct blk_zone_range {
>>>  #define BLKOPENZONE	_IOW(0x12, 134, struct blk_zone_range)
>>>  #define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
>>>  #define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
>>> +#define BLKMGMTZONE	_IOR(0x12, 137, struct blk_zone_mgmt)
>>>
>>>  #endif /* _UAPI_BLKZONED_H */
>>>
>>
>> Without defining what the flags can be, it is hard to judge what will change
>>from the current distinct ioctls.
>>
> 
> The first flag is the one to select all. Down the line we have other
> modifiers that make sense, but it is true that it is public yet.

You mean *not* public ?

> 
> Would you like to wait until then or is it an option to revise the IOCTL
> now?

Yes. Wait until it is actually needed. Adding code that has no users makes it
impossible to test so not acceptable. As for the "all zones" flag, I already
commented about it.

> 
> Javier
> 


-- 
Damien Le Moal
Western Digital Research



More information about the Linux-nvme mailing list