[PATCH V6 4/6] nvmet: add ZBD over ZNS backend support

Damien Le Moal Damien.LeMoal at wdc.com
Sun Dec 13 18:41:09 EST 2020


On 2020/12/13 19:31, Damien Le Moal wrote:
> On Sat, 2020-12-12 at 21:50 -0800, Chaitanya Kulkarni wrote:
> [...]
>> +static int nvmet_bdev_validate_zns_zones_cb(struct blk_zone *z,
>> +		unsigned int idx, void *data)
>> +{
>> +	struct blk_zone *zone = data;
>> +
>> +	memcpy(zone, z, sizeof(struct blk_zone));
> 
> See below. This is not necessary.
> 
>> +
>> +	return 0;
>> +}
>> +
>> +static bool nvmet_bdev_has_conv_zones(struct block_device *bdev)
>> +{
>> +	struct blk_zone zone;
>> +	int reported_zones;
>> +	unsigned int zno;
>> +
>> +	if (bdev->bd_disk->queue->conv_zones_bitmap)
>> +		return false;
> 
> Bug.
> 
>> +
>> +	for (zno = 0; zno < blk_queue_nr_zones(bdev->bd_disk->queue); zno++) {
> 
> Large capacity SMR drives have over 75,000 zones these days. Doing a report
> zones one zone at a time will take forever. This needs to be optimized: see
> below.
> 
>> +		reported_zones = blkdev_report_zones(bdev,
>> +				zno * bdev_zone_sectors(bdev), 1,
>> +				nvmet_bdev_validate_zns_zones_cb,
>> +				&zone);
>> +
>> +		if (reported_zones != 1)
>> +			return true;
>> +
>> +		if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL)
>> +			return true;
> 
> This test should be in the nvmet_bdev_validate_zns_zones_cb() callback. That
> callback can return an error if it finds a conventional zone. That will stop
> blkdev_report_zones().
> 
> 
>> +	}
>> +
>> +	return false;
>> +}
> 
> What about this:
> 
> static int nvmet_bdev_validate_zns_zones_cb(struct blk_zone *z,
> 					    unsigned int idx, void *data)
> {
> 	if (z->type == BLK_ZONE_TYPE_CONVENTIONAL)
> 		return -ENOTSUPP;
> 	return 0;
> }
> 
> static bool nvmet_bdev_has_conv_zones(struct block_device *bdev)
> {
> 	int ret;
> 
> 	if (bdev->bd_disk->queue->conv_zones_bitmap)
> 		return true;
> 
> 	ret = blkdev_report_zones(bdev,
> 			get_capacity(bdev->bd_disk), blkdev_nr_zones(bdev),
> 			nvmet_bdev_validate_zns_zones_cb, NULL);

Oops. This is wrong. This should be:

	ret = blkdev_report_zones(bdev, 0, blkdev_nr_zones(bdev->bd_disk),
				  nvmet_bdev_validate_zns_zones_cb, NULL);

> 	if (ret < 1)
> 		return true;
> 
> 	return false;
> }
> 
> All zones are checked using the callback with the loop in
> blkdev_report_zones().
> 
> [...]
>> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
>> +{
>> +	sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
>> +	struct request_queue *q = req->ns->bdev->bd_disk->queue;
>> +	unsigned int max_sects = queue_max_zone_append_sectors(q);
>> +	u16 status = NVME_SC_SUCCESS;
>> +	unsigned int total_len = 0;
>> +	struct scatterlist *sg;
>> +	int ret = 0, sg_cnt;
>> +	struct bio *bio;
>> +
>> +	if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
>> +		return;
>> +
>> +	if (!req->sg_cnt) {
>> +		nvmet_req_complete(req, 0);
>> +		return;
>> +	}
>> +
>> +	if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
>> +		bio = &req->b.inline_bio;
>> +		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
>> +	} else {
>> +		bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
>> +	}
>> +
>> +	bio_set_dev(bio, req->ns->bdev);
>> +	bio->bi_iter.bi_sector = sect;
>> +	bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
>> +	if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
>> +		bio->bi_opf |= REQ_FUA;
>> +
>> +	for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
>> +		struct page *p = sg_page(sg);
>> +		unsigned int l = sg->length;
>> +		unsigned int o = sg->offset;
>> +		bool same_page = false;
>> +
>> +		ret = bio_add_hw_page(q, bio, p, l, o, max_sects, &same_page);
>> +		if (ret != sg->length) {
>> +			status = NVME_SC_INTERNAL;
>> +			goto out_bio_put;
>> +		}
>> +		if (same_page)
>> +			put_page(p);
>> +
>> +		total_len += sg->length;
>> +	}
>> +
>> +	if (total_len != nvmet_rw_data_len(req)) {
>> +		status = NVME_SC_INTERNAL | NVME_SC_DNR;
>> +		goto out_bio_put;
>> +	}
>> +
>> +	ret = submit_bio_wait(bio);
>> +	status = ret < 0 ? NVME_SC_INTERNAL : status;
>> +
>> +	req->cqe->result.u64 = nvmet_sect_to_lba(req->ns,
>> +						 bio->bi_iter.bi_sector);
> 
> Why set this if the BIO failed ? There may be no problems doing so, but I do
> not see the point either.
> 
>> +
>> +out_bio_put:
>> +	if (bio != &req->b.inline_bio)
>> +		bio_put(bio);
>> +	nvmet_req_complete(req, status);
>> +}
> 


-- 
Damien Le Moal
Western Digital Research



More information about the Linux-nvme mailing list