[PATCH 2/4] block: add support for zone offline transition
Damien Le Moal
Damien.LeMoal at wdc.com
Thu Jul 2 04:10:53 EDT 2020
On 2020/07/02 15:55, Javier González wrote:
> From: Javier González <javier.gonz at samsung.com>
>
> Add support for offline transition on the zoned block device. Use the
> existing feature flags for the underlying driver to report support for
> the feature, as currently this transition is only supported in ZNS and
> not in ZAC/ZBC
>
> Signed-off-by: Javier González <javier.gonz at samsung.com>
> Signed-off-by: SelvaKumar S <selvakuma.s1 at samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty at samsung.com>
> ---
> block/blk-core.c | 2 ++
> block/blk-zoned.c | 8 +++++++-
> drivers/nvme/host/core.c | 3 +++
> drivers/nvme/host/zns.c | 2 +-
> include/linux/blk_types.h | 3 +++
> include/linux/blkdev.h | 1 -
> include/uapi/linux/blkzoned.h | 3 +++
> 7 files changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 03252af8c82c..589cbdacc5ec 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -140,6 +140,7 @@ static const char *const blk_op_name[] = {
> REQ_OP_NAME(ZONE_CLOSE),
> REQ_OP_NAME(ZONE_FINISH),
> REQ_OP_NAME(ZONE_APPEND),
> + REQ_OP_NAME(ZONE_OFFLINE),
> REQ_OP_NAME(WRITE_SAME),
> REQ_OP_NAME(WRITE_ZEROES),
> REQ_OP_NAME(SCSI_IN),
> @@ -1030,6 +1031,7 @@ generic_make_request_checks(struct bio *bio)
> case REQ_OP_ZONE_OPEN:
> case REQ_OP_ZONE_CLOSE:
> case REQ_OP_ZONE_FINISH:
> + case REQ_OP_ZONE_OFFLINE:
> if (!blk_queue_is_zoned(q))
> goto not_supported;
> break;
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 0f156e96e48f..b97f67f462b4 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -320,7 +320,8 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
> }
>
> /*
> - * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
> + * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE, BLKFINISHZONE and BLKOFFLINEZONE
> + * ioctl processing.
> * Called from blkdev_ioctl.
> */
> int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
> @@ -363,6 +364,11 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
> case BLKFINISHZONE:
> op = REQ_OP_ZONE_FINISH;
> break;
> + case BLKOFFLINEZONE:
> + if (!(q->zone_flags & BLK_ZONE_REP_OFFLINE))
> + return -EINVAL;
return -ENOTTY here.
That is the error returned for regular block devices when a zone ioctl is
received, indicating the lack of support for these ioctls. Since this is also a
lack of support by the device here too, we may as well keep the same error
code. Returning -EINVAL should be reserved for cases where the device can accept
the ioctl but start sector or number of sectors is invalid.
> + op = REQ_OP_ZONE_OFFLINE;
> + break;
> default:
> return -ENOTTY;
> }
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index e5f754889234..1f5c7fc3d2c9 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -776,6 +776,9 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
> case REQ_OP_ZONE_FINISH:
> ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_FINISH);
> break;
> + case REQ_OP_ZONE_OFFLINE:
> + ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_OFFLINE);
> + break;
> case REQ_OP_WRITE_ZEROES:
> ret = nvme_setup_write_zeroes(ns, req, cmd);
> break;
> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
> index 888264261ba3..b34d2ed13825 100644
> --- a/drivers/nvme/host/zns.c
> +++ b/drivers/nvme/host/zns.c
> @@ -81,7 +81,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
> }
>
> q->limits.zoned = BLK_ZONED_HM;
> - q->zone_flags = BLK_ZONE_REP_CAPACITY;
> + q->zone_flags = BLK_ZONE_REP_CAPACITY | BLK_ZONE_REP_OFFLINE;
The name BLK_ZONE_REP_OFFLINE is not ideal. This flag is not about if offline
condition will be reported or not. It is about the drive supporting an explicit
offlining zone operation.
> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
> free_data:
> kfree(id);
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index ccb895f911b1..c0123c643e2f 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -316,6 +316,8 @@ enum req_opf {
> REQ_OP_ZONE_FINISH = 12,
> /* write data at the current zone write pointer */
> REQ_OP_ZONE_APPEND = 13,
> + /* Transition a zone to offline */
> + REQ_OP_ZONE_OFFLINE = 14,
>
> /* SCSI passthrough using struct scsi_request */
> REQ_OP_SCSI_IN = 32,
> @@ -455,6 +457,7 @@ static inline bool op_is_zone_mgmt(enum req_opf op)
> case REQ_OP_ZONE_OPEN:
> case REQ_OP_ZONE_CLOSE:
> case REQ_OP_ZONE_FINISH:
> + case REQ_OP_ZONE_OFFLINE:
> return true;
> default:
> return false;
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 3f2e3425fa53..e489b646486d 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -370,7 +370,6 @@ extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
> unsigned int cmd, unsigned long arg);
> extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
> unsigned int cmd, unsigned long arg);
> -
> #else /* CONFIG_BLK_DEV_ZONED */
>
> static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
> index 42c3366cc25f..e5adf4a9f4b0 100644
> --- a/include/uapi/linux/blkzoned.h
> +++ b/include/uapi/linux/blkzoned.h
> @@ -77,9 +77,11 @@ enum blk_zone_cond {
> * enum blk_zone_report_flags - Feature flags of reported zone descriptors.
> *
> * @BLK_ZONE_REP_CAPACITY: Zone descriptor has capacity field.
> + * @BLK_ZONE_REP_OFFLINE : Zone device supports offline transition.
The device supports explicit zone offline transition
Since the implicit transition by the device may happen, even on SMR disks.
But I am not sure this flags is very useful. Or rather, isn't it out of place
here ? Device features are normally reported through sysfs (e.g. discard, etc).
It is certainly confusing and not matching the user doc for rep.flag which
states that the flags are about the zone descriptors, not what the device can
do. So at the very least, the comments need to change.
The other thing is that the implementation does not consider device mapper case
again: if a DM target is built on one or more ZNS drives all supporting zone
offline, then the target should be allowed to report zone offline support too,
no ? dm-linear and dm-flakey certainly should be allowed to do that. Exporting a
"zone_offline" (or something like named that) sysfs limit would allow that to be
supported easily through limit stacking and avoid the need for the report flag.
Happy to here others opinion about this one though.
> */
> enum blk_zone_report_flags {
> BLK_ZONE_REP_CAPACITY = (1 << 0),
> + BLK_ZONE_REP_OFFLINE = (1 << 1),
> };
>
> /**
> @@ -166,5 +168,6 @@ struct blk_zone_range {
> #define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
> #define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
> #define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
> +#define BLKOFFLINEZONE _IOW(0x12, 137, struct blk_zone_range)
>
> #endif /* _UAPI_BLKZONED_H */
>
--
Damien Le Moal
Western Digital Research
More information about the Linux-nvme
mailing list