[PATCH V5 4/6] nvmet: add ZBD over ZNS backend support
Damien Le Moal
Damien.LeMoal at wdc.com
Sat Dec 12 04:25:38 EST 2020
On 2020/12/12 15:54, Chaitanya Kulkarni wrote:
[...]
>> +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
>> +{
>> + if (ns->bdev->bd_disk->queue->conv_zones_bitmap) {
>> Hmm... BIO based DM devices do not have this bitmap set since they do not have a
>> scheduler. So if one setup a dm-linear device on top of an SMR disk and export
>> the DM device through fabric, then this check will fail to verify if
>> conventional zones are present. There may be no other option than to do a full
>> report zones here if queue->seq_zones_wlock is NULL (meaning the queue is for a
>> stacked device).
>
> If I'm not wrong each LLD does call the report zones at disk revalidation,
> as we should be able to reuse it instead of repeating for each zbd ns
> especially for static property:-
I did say BIO based DM... If the backend is a dm-linear device, the bdev and
request queue that this driver sees is the DM device, not the bdev and request
queue of the DM backend. And DM code does *not* call
blk_revalidate_disk_zones(). In that function, you can see:
if (WARN_ON_ONCE(!queue_is_mq(q)))
return -EIO;
to check that.
So the zone bitmaps are *not* set for a DM device. Which means that this driver
needs to do a report zones to determine if there are conventional zones.
>
> 1. drivers/block/null_blk_zoned.c:-
> null_register_zoned_dev int
> ret = blk_revalidate_disk_zones(nullb->disk, NULL);
> 2. drivers/nvme/host/zns.c:-
> nvme_revalidate_zones
> ret = blk_revalidate_disk_zones(ns->disk, NULL);
> 3. drivers/scsi/sd_zbc.c:-
> sd_zbc_revalidate_zones
> ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
>
> Calling report again is a duplication of the work consuming cpu cycles for
> each zbd ns.
>
> Unless something wrong we can get away with following prep patch with one
> call in zns.c :-
No. That will not work if the backend is a DM device. You will hit the warning
mentioned above. DM sets the number of zones manually. See dm-table.c, function
dm_table_set_restrictions().
We could get to have blk_revalidate_disk_zones() working on a DM device, but
that is not very useful since the backend was validated already, and the bitmaps
are useless since there is no scheduling of BIO/req done at DM level.
>
> From abceef7bfdf9b278c492c755bf5f242159ef51e5 Mon Sep 17 00:00:00 2001
> From: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
> Date: Fri, 11 Dec 2020 21:21:44 -0800
> Subject: [PATCH V6 2/7] block: add nr_conv_zones and nr_seq_zones helpers
>
> Add two request members that are needed to implement the NVMeOF ZBD
> backend which exports a number of conventional zones and a number of
> sequential zones so we don't have to repeat the work what
> blk_revalidate_disk_zones() already does.
>
> Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
> ---
> block/blk-sysfs.c | 14 ++++++++++++++
> block/blk-zoned.c | 9 +++++++++
> include/linux/blkdev.h | 13 +++++++++++++
> 3 files changed, 36 insertions(+)
>
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index b513f1683af0..f10cf45ae177 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -307,6 +307,16 @@ static ssize_t queue_nr_zones_show(struct
> request_queue *q, char *page)
> return queue_var_show(blk_queue_nr_zones(q), page);
> }
>
> +static ssize_t queue_nr_conv_zones_show(struct request_queue *q, char
> *page)
> +{
> + return queue_var_show(blk_queue_nr_conv_zones(q), page);
> +}
> +
> +static ssize_t queue_nr_seq_zones_show(struct request_queue *q, char *page)
> +{
> + return queue_var_show(blk_queue_nr_seq_zones(q), page);
> +}
> +
> static ssize_t queue_max_open_zones_show(struct request_queue *q, char
> *page)
> {
> return queue_var_show(queue_max_open_zones(q), page);
> @@ -588,6 +598,8 @@ QUEUE_RO_ENTRY(queue_zone_append_max,
> "zone_append_max_bytes");
>
> QUEUE_RO_ENTRY(queue_zoned, "zoned");
> QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
> +QUEUE_RO_ENTRY(queue_nr_conv_zones, "nr_conv_zones");
> +QUEUE_RO_ENTRY(queue_nr_seq_zones, "nr_seq_zones");
> QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
> QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
>
> @@ -642,6 +654,8 @@ static struct attribute *queue_attrs[] = {
> &queue_nonrot_entry.attr,
> &queue_zoned_entry.attr,
> &queue_nr_zones_entry.attr,
> + &queue_nr_conv_zones_entry.attr,
> + &queue_nr_seq_zones_entry.attr,
> &queue_max_open_zones_entry.attr,
> &queue_max_active_zones_entry.attr,
> &queue_nomerges_entry.attr,
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 6817a673e5ce..ea38c7928e41 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -390,6 +390,8 @@ struct blk_revalidate_zone_args {
> unsigned long *conv_zones_bitmap;
> unsigned long *seq_zones_wlock;
> unsigned int nr_zones;
> + unsigned int nr_conv_zones;
> + unsigned int nr_seq_zones;
> sector_t zone_sectors;
> sector_t sector;
> };
> @@ -449,6 +451,7 @@ static int blk_revalidate_zone_cb(struct blk_zone
> *zone, unsigned int idx,
> return -ENOMEM;
> }
> set_bit(idx, args->conv_zones_bitmap);
> + args->nr_conv_zones++;
> break;
> case BLK_ZONE_TYPE_SEQWRITE_REQ:
> case BLK_ZONE_TYPE_SEQWRITE_PREF:
> @@ -458,6 +461,7 @@ static int blk_revalidate_zone_cb(struct blk_zone
> *zone, unsigned int idx,
> if (!args->seq_zones_wlock)
> return -ENOMEM;
> }
> + args->nr_seq_zones++;
> break;
> default:
> pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
> @@ -489,6 +493,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
> struct request_queue *q = disk->queue;
> struct blk_revalidate_zone_args args = {
> .disk = disk,
> + /* just for redability */
> + .nr_conv_zones = 0,
> + .nr_seq_zones = 0,
> };
> unsigned int noio_flag;
> int ret;
> @@ -519,6 +526,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
> if (ret >= 0) {
> blk_queue_chunk_sectors(q, args.zone_sectors);
> q->nr_zones = args.nr_zones;
> + q->nr_conv_zones = args.nr_conv_zones;
> + q->nr_seq_zones = args.nr_seq_zones;
> swap(q->seq_zones_wlock, args.seq_zones_wlock);
> swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
> if (update_driver_data)
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 2bdaa7cacfa3..697ded01e049 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -526,6 +526,9 @@ struct request_queue {
> unsigned long *seq_zones_wlock;
> unsigned int max_open_zones;
> unsigned int max_active_zones;
> + unsigned int nr_conv_zones;
> + unsigned int nr_seq_zones;
> +
> #endif /* CONFIG_BLK_DEV_ZONED */
>
> /*
> @@ -726,6 +729,16 @@ static inline unsigned int
> blk_queue_nr_zones(struct request_queue *q)
> return blk_queue_is_zoned(q) ? q->nr_zones : 0;
> }
>
> +static inline unsigned int blk_queue_nr_conv_zones(struct request_queue *q)
> +{
> + return blk_queue_is_zoned(q) ? q->nr_conv_zones : 0;
> +}
> +
> +static inline unsigned int blk_queue_nr_seq_zones(struct request_queue *q)
> +{
> + return blk_queue_is_zoned(q) ? q->nr_seq_zones : 0;
> +}
> +
> static inline unsigned int blk_queue_zone_no(struct request_queue *q,
> sector_t sector)
> {
>
--
Damien Le Moal
Western Digital Research
More information about the Linux-nvme
mailing list