[PATCH V5 4/6] nvmet: add ZBD over ZNS backend support

Damien Le Moal Damien.LeMoal at wdc.com
Sat Dec 12 04:25:38 EST 2020


On 2020/12/12 15:54, Chaitanya Kulkarni wrote:
[...]
>> +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
>> +{
>> +	if (ns->bdev->bd_disk->queue->conv_zones_bitmap) {
>> Hmm... BIO based DM devices do not have this bitmap set since they do not have a
>> scheduler. So if one setup a dm-linear device on top of an SMR disk and export
>> the DM device through fabric, then this check will fail to verify if
>> conventional zones are present. There may be no other option than to do a full
>> report zones here if queue->seq_zones_wlock is NULL (meaning the queue is for a
>> stacked device).
> 
> If I'm not wrong each LLD does call the report zones at disk revalidation,
> as we should be able to reuse it instead of repeating for each zbd ns
> especially for static property:-

I did say BIO based DM... If the backend is a dm-linear device, the bdev and
request queue that this driver sees is the DM device, not the bdev and request
queue of the DM backend. And DM code does *not* call
blk_revalidate_disk_zones(). In that function, you can see:

	if (WARN_ON_ONCE(!queue_is_mq(q)))
		return -EIO;

to check that.

So the zone bitmaps are *not* set for a DM device. Which means that this driver
needs to do a report zones to determine if there are conventional zones.

> 
> 1. drivers/block/null_blk_zoned.c:-
>     null_register_zoned_dev int
>         ret = blk_revalidate_disk_zones(nullb->disk, NULL);
> 2. drivers/nvme/host/zns.c:-
>     nvme_revalidate_zones
>         ret = blk_revalidate_disk_zones(ns->disk, NULL);
> 3. drivers/scsi/sd_zbc.c:-
>     sd_zbc_revalidate_zones
>         ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
> 
> Calling report again is a duplication of the work consuming cpu cycles for
> each zbd ns.
> 
> Unless something wrong we can get away with following prep patch with one
> call in zns.c :-

No. That will not work if the backend is a DM device. You will hit the warning
mentioned above. DM sets the number of zones manually. See dm-table.c, function
dm_table_set_restrictions().

We could get to have blk_revalidate_disk_zones() working on a DM device, but
that is not very useful since the backend was validated already, and the bitmaps
are useless since there is no scheduling of BIO/req done at DM level.

> 
> From abceef7bfdf9b278c492c755bf5f242159ef51e5 Mon Sep 17 00:00:00 2001
> From: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
> Date: Fri, 11 Dec 2020 21:21:44 -0800
> Subject: [PATCH V6 2/7] block: add nr_conv_zones and nr_seq_zones helpers
> 
> Add two request members that are needed to implement the NVMeOF ZBD
> backend which exports a number of conventional zones and a number of
> sequential zones so we don't have to repeat the work what
> blk_revalidate_disk_zones() already does.
> 
> Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
> ---
>  block/blk-sysfs.c      | 14 ++++++++++++++
>  block/blk-zoned.c      |  9 +++++++++
>  include/linux/blkdev.h | 13 +++++++++++++
>  3 files changed, 36 insertions(+)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index b513f1683af0..f10cf45ae177 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -307,6 +307,16 @@ static ssize_t queue_nr_zones_show(struct
> request_queue *q, char *page)
>      return queue_var_show(blk_queue_nr_zones(q), page);
>  }
>  
> +static ssize_t queue_nr_conv_zones_show(struct request_queue *q, char
> *page)
> +{
> +    return queue_var_show(blk_queue_nr_conv_zones(q), page);
> +}
> +
> +static ssize_t queue_nr_seq_zones_show(struct request_queue *q, char *page)
> +{
> +    return queue_var_show(blk_queue_nr_seq_zones(q), page);
> +}
> +
>  static ssize_t queue_max_open_zones_show(struct request_queue *q, char
> *page)
>  {
>      return queue_var_show(queue_max_open_zones(q), page);
> @@ -588,6 +598,8 @@ QUEUE_RO_ENTRY(queue_zone_append_max,
> "zone_append_max_bytes");
>  
>  QUEUE_RO_ENTRY(queue_zoned, "zoned");
>  QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
> +QUEUE_RO_ENTRY(queue_nr_conv_zones, "nr_conv_zones");
> +QUEUE_RO_ENTRY(queue_nr_seq_zones, "nr_seq_zones");
>  QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
>  QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
>  
> @@ -642,6 +654,8 @@ static struct attribute *queue_attrs[] = {
>      &queue_nonrot_entry.attr,
>      &queue_zoned_entry.attr,
>      &queue_nr_zones_entry.attr,
> +    &queue_nr_conv_zones_entry.attr,
> +    &queue_nr_seq_zones_entry.attr,
>      &queue_max_open_zones_entry.attr,
>      &queue_max_active_zones_entry.attr,
>      &queue_nomerges_entry.attr,
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 6817a673e5ce..ea38c7928e41 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -390,6 +390,8 @@ struct blk_revalidate_zone_args {
>      unsigned long    *conv_zones_bitmap;
>      unsigned long    *seq_zones_wlock;
>      unsigned int    nr_zones;
> +    unsigned int    nr_conv_zones;
> +    unsigned int    nr_seq_zones;
>      sector_t    zone_sectors;
>      sector_t    sector;
>  };
> @@ -449,6 +451,7 @@ static int blk_revalidate_zone_cb(struct blk_zone
> *zone, unsigned int idx,
>                  return -ENOMEM;
>          }
>          set_bit(idx, args->conv_zones_bitmap);
> +        args->nr_conv_zones++;
>          break;
>      case BLK_ZONE_TYPE_SEQWRITE_REQ:
>      case BLK_ZONE_TYPE_SEQWRITE_PREF:
> @@ -458,6 +461,7 @@ static int blk_revalidate_zone_cb(struct blk_zone
> *zone, unsigned int idx,
>              if (!args->seq_zones_wlock)
>                  return -ENOMEM;
>          }
> +        args->nr_seq_zones++;
>          break;
>      default:
>          pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
> @@ -489,6 +493,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
>      struct request_queue *q = disk->queue;
>      struct blk_revalidate_zone_args args = {
>          .disk        = disk,
> +        /* just for redability */
> +        .nr_conv_zones    = 0,
> +        .nr_seq_zones    = 0,
>      };
>      unsigned int noio_flag;
>      int ret;
> @@ -519,6 +526,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
>      if (ret >= 0) {
>          blk_queue_chunk_sectors(q, args.zone_sectors);
>          q->nr_zones = args.nr_zones;
> +        q->nr_conv_zones = args.nr_conv_zones;
> +        q->nr_seq_zones = args.nr_seq_zones;
>          swap(q->seq_zones_wlock, args.seq_zones_wlock);
>          swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
>          if (update_driver_data)
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 2bdaa7cacfa3..697ded01e049 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -526,6 +526,9 @@ struct request_queue {
>      unsigned long        *seq_zones_wlock;
>      unsigned int        max_open_zones;
>      unsigned int        max_active_zones;
> +    unsigned int        nr_conv_zones;
> +    unsigned int        nr_seq_zones;
> +
>  #endif /* CONFIG_BLK_DEV_ZONED */
>  
>      /*
> @@ -726,6 +729,16 @@ static inline unsigned int
> blk_queue_nr_zones(struct request_queue *q)
>      return blk_queue_is_zoned(q) ? q->nr_zones : 0;
>  }
>  
> +static inline unsigned int blk_queue_nr_conv_zones(struct request_queue *q)
> +{
> +    return blk_queue_is_zoned(q) ? q->nr_conv_zones : 0;
> +}
> +
> +static inline unsigned int blk_queue_nr_seq_zones(struct request_queue *q)
> +{
> +    return blk_queue_is_zoned(q) ? q->nr_seq_zones : 0;
> +}
> +
>  static inline unsigned int blk_queue_zone_no(struct request_queue *q,
>                           sector_t sector)
>  {
> 


-- 
Damien Le Moal
Western Digital Research



More information about the Linux-nvme mailing list