[PATCH V9 4/9] nvmet: add ZBD over ZNS backend support

Chaitanya Kulkarni Chaitanya.Kulkarni at wdc.com
Tue Jan 12 01:11:02 EST 2021


On 1/11/21 21:32, Damien Le Moal wrote:
> On 2021/01/12 13:27, Chaitanya Kulkarni wrote:
>> NVMe TP 4053 – Zoned Namespaces (ZNS) allows host software to
>> communicate with a non-volatile memory subsystem using zones for
>> NVMe protocol based controllers. NVMeOF already support the ZNS NVMe
>> Protocol compliant devices on the target in the passthru mode. There
>> are Generic zoned block devices like  Shingled Magnetic Recording (SMR)
>> HDDs that are not based on the NVMe protocol.
>>
>> This patch adds ZNS backend to support the ZBDs for NVMeOF target.
>>
>> This support includes implementing the new command set NVME_CSI_ZNS,
>> adding different command handlers for ZNS command set such as
>> NVMe Identify Controller, NVMe Identify Namespace, NVMe Zone Append,
>> NVMe Zone Management Send and NVMe Zone Management Receive.
>>
>> With new command set identifier we also update the target command effects
>> logs to reflect the ZNS compliant commands.
>>
>> Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni at wdc.com>
>> ---
>>  drivers/nvme/target/Makefile      |   1 +
>>  drivers/nvme/target/admin-cmd.c   |  28 +++
>>  drivers/nvme/target/core.c        |   3 +
>>  drivers/nvme/target/io-cmd-bdev.c |  33 ++-
>>  drivers/nvme/target/nvmet.h       |  38 ++++
>>  drivers/nvme/target/zns.c         | 342 ++++++++++++++++++++++++++++++
>>  6 files changed, 437 insertions(+), 8 deletions(-)
>>  create mode 100644 drivers/nvme/target/zns.c
>>
>> diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
>> index ebf91fc4c72e..9837e580fa7e 100644
>> --- a/drivers/nvme/target/Makefile
>> +++ b/drivers/nvme/target/Makefile
>> @@ -12,6 +12,7 @@ obj-$(CONFIG_NVME_TARGET_TCP)		+= nvmet-tcp.o
>>  nvmet-y		+= core.o configfs.o admin-cmd.o fabrics-cmd.o \
>>  			discovery.o io-cmd-file.o io-cmd-bdev.o
>>  nvmet-$(CONFIG_NVME_TARGET_PASSTHRU)	+= passthru.o
>> +nvmet-$(CONFIG_BLK_DEV_ZONED)		+= zns.o
>>  nvme-loop-y	+= loop.o
>>  nvmet-rdma-y	+= rdma.o
>>  nvmet-fc-y	+= fc.o
>> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
>> index a50b7bcac67a..bdf09d8faa48 100644
>> --- a/drivers/nvme/target/admin-cmd.c
>> +++ b/drivers/nvme/target/admin-cmd.c
>> @@ -191,6 +191,15 @@ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
>>  		log->iocs[nvme_cmd_dsm]			= cpu_to_le32(1 << 0);
>>  		log->iocs[nvme_cmd_write_zeroes]	= cpu_to_le32(1 << 0);
>>  		break;
>> +	case NVME_CSI_ZNS:
>> +		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
>> +			u32 *iocs = log->iocs;
>> +
>> +			iocs[nvme_cmd_zone_append]	= cpu_to_le32(1 << 0);
>> +			iocs[nvme_cmd_zone_mgmt_send]	= cpu_to_le32(1 << 0);
>> +			iocs[nvme_cmd_zone_mgmt_recv]	= cpu_to_le32(1 << 0);
>> +		}
> Is it OK to not return an error here if CONFIG_BLK_DEV_ZONED is not enabled ?
> I have not checked the entire code of this function nor how it is called, so I
> may be wrong.
Since we only set the controller cap when CONFIG_BLK_DEV_ZONED is
enabled we should be uniform everywhere in the code, I'll recheck
and make the change if needed.
>> +		break;
>>  	default:
>>  		status = NVME_SC_INVALID_LOG_PAGE;
>>  		break;
>> @@ -644,6 +653,17 @@ static void nvmet_execute_identify_desclist(struct nvmet_req *req)
>>  	if (status)
>>  		goto out;
>>  
>> +	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
>> +		u16 nvme_cis_zns = NVME_CSI_ZNS;
>> +
>> +		if (req->ns->csi == NVME_CSI_ZNS)
>> +			status = nvmet_copy_ns_identifier(req, NVME_NIDT_CSI,
>> +							  NVME_NIDT_CSI_LEN,
>> +							  &nvme_cis_zns, &off);
>> +		if (status)
>> +			goto out;
>> +	}
> Same comment here.
I think same explanation applies here too, will recheck and make the change
if needed.
>
>> +
>>  	if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
>>  			off) != NVME_IDENTIFY_DATA_SIZE - off)
>>  		status = NVME_SC_INTERNAL | NVME_SC_DNR;
>> @@ -660,8 +680,16 @@ static void nvmet_execute_identify(struct nvmet_req *req)
>>  	switch (req->cmd->identify.cns) {
>>  	case NVME_ID_CNS_NS:
>>  		return nvmet_execute_identify_ns(req);
>> +	case NVME_ID_CNS_CS_NS:
>> +		if (req->cmd->identify.csi == NVME_CSI_ZNS)
>> +			return nvmet_execute_identify_cns_cs_ns(req);
>> +		break;
>>  	case NVME_ID_CNS_CTRL:
>>  		return nvmet_execute_identify_ctrl(req);
>> +	case NVME_ID_CNS_CS_CTRL:
>> +		if (req->cmd->identify.csi == NVME_CSI_ZNS)
>> +			return nvmet_execute_identify_cns_cs_ctrl(req);
>> +		break;
>>  	case NVME_ID_CNS_NS_ACTIVE_LIST:
>>  		return nvmet_execute_identify_nslist(req);
>>  	case NVME_ID_CNS_NS_DESC_LIST:
>> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
>> index 672e4009f8d6..17d5da062a5a 100644
>> --- a/drivers/nvme/target/core.c
>> +++ b/drivers/nvme/target/core.c
>> @@ -1107,6 +1107,7 @@ static inline u8 nvmet_cc_iocqes(u32 cc)
>>  static inline bool nvmet_cc_css_check(u8 cc_css)
>>  {
>>  	switch (cc_css <<= NVME_CC_CSS_SHIFT) {
>> +	case NVME_CC_CSS_CSI:
>>  	case NVME_CC_CSS_NVM:
>>  		return true;
>>  	default:
>> @@ -1173,6 +1174,8 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
>>  {
>>  	/* command sets supported: NVMe command set: */
>>  	ctrl->cap = (1ULL << 37);
>> +	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
>> +		ctrl->cap |= (1ULL << 43);
>>  	/* CC.EN timeout in 500msec units: */
>>  	ctrl->cap |= (15ULL << 24);
>>  	/* maximum queue entries supported: */
>> diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
>> index 23095bdfce06..6178ef643962 100644
>> --- a/drivers/nvme/target/io-cmd-bdev.c
>> +++ b/drivers/nvme/target/io-cmd-bdev.c
>> @@ -63,6 +63,14 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
>>  	}
>>  }
>>  
>> +void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
>> +{
>> +	if (ns->bdev) {
>> +		blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
>> +		ns->bdev = NULL;
>> +	}
>> +}
>> +
>>  int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
>>  {
>>  	int ret;
>> @@ -86,15 +94,15 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
>>  	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
>>  		nvmet_bdev_ns_enable_integrity(ns);
>>  
>> -	return 0;
>> -}
>> -
>> -void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
>> -{
>> -	if (ns->bdev) {
>> -		blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
>> -		ns->bdev = NULL;
>> +	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && bdev_is_zoned(ns->bdev)) {
>> +		if (!nvmet_bdev_zns_enable(ns)) {
>> +			nvmet_bdev_ns_disable(ns);
>> +			return -EINVAL;
>> +		}
>> +		ns->csi = NVME_CSI_ZNS;
>>  	}
>> +
>> +	return 0;
>>  }
>>  
>>  void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
>> @@ -448,6 +456,15 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
>>  	case nvme_cmd_write_zeroes:
>>  		req->execute = nvmet_bdev_execute_write_zeroes;
>>  		return 0;
>> +	case nvme_cmd_zone_append:
>> +		req->execute = nvmet_bdev_execute_zone_append;
>> +		return 0;
>> +	case nvme_cmd_zone_mgmt_recv:
>> +		req->execute = nvmet_bdev_execute_zone_mgmt_recv;
>> +		return 0;
>> +	case nvme_cmd_zone_mgmt_send:
>> +		req->execute = nvmet_bdev_execute_zone_mgmt_send;
>> +		return 0;
>>  	default:
>>  		pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
>>  		       req->sq->qid);
>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
>> index 476b3cd91c65..7361665585a2 100644
>> --- a/drivers/nvme/target/nvmet.h
>> +++ b/drivers/nvme/target/nvmet.h
>> @@ -252,6 +252,10 @@ struct nvmet_subsys {
>>  	unsigned int		admin_timeout;
>>  	unsigned int		io_timeout;
>>  #endif /* CONFIG_NVME_TARGET_PASSTHRU */
>> +
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +	u8			zasl;
>> +#endif /* CONFIG_BLK_DEV_ZONED */
>>  };
>>  
>>  static inline struct nvmet_subsys *to_subsys(struct config_item *item)
>> @@ -614,4 +618,38 @@ static inline sector_t nvmet_lba_to_sect(struct nvmet_ns *ns, __le64 lba)
>>  	return le64_to_cpu(lba) << (ns->blksize_shift - SECTOR_SHIFT);
>>  }
>>  
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
>> +void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req);
>> +void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req);
>> +void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req);
>> +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req);
>> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req);
>> +#else  /* CONFIG_BLK_DEV_ZONED */
>> +static inline bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
>> +{
>> +	return false;
>> +}
>> +static inline void
>> +nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
>> +{
>> +}
>> +static inline void
>> +nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
>> +{
>> +}
>> +static inline void
>> +nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req)
>> +{
>> +}
>> +static inline void
>> +nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
>> +{
>> +}
>> +static inline void
>> +nvmet_bdev_execute_zone_append(struct nvmet_req *req)
>> +{
>> +}
>> +#endif /* CONFIG_BLK_DEV_ZONED */
>> +
>>  #endif /* _NVMET_H */
>> diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
>> new file mode 100644
>> index 000000000000..2a71f56e568d
>> --- /dev/null
>> +++ b/drivers/nvme/target/zns.c
>> @@ -0,0 +1,342 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * NVMe ZNS-ZBD command implementation.
>> + * Copyright (c) 2020-2021 HGST, a Western Digital Company.
>> + */
>> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>> +#include <linux/nvme.h>
>> +#include <linux/blkdev.h>
>> +#include "nvmet.h"
>> +
>> +/*
>> + * We set the Memory Page Size Minimum (MPSMIN) for target controller to 0
>> + * which gets added by 12 in the nvme_enable_ctrl() which results in 2^12 = 4k
>> + * as page_shift value. When calculating the ZASL use shift by 12.
>> + */
>> +#define NVMET_MPSMIN_SHIFT	12
>> +
>> +static u16 nvmet_bdev_zns_checks(struct nvmet_req *req)
>> +{
>> +	u16 status = NVME_SC_SUCCESS;
>> +
>> +	if (!bdev_is_zoned(req->ns->bdev)) {
>> +		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
>> +		goto out;
>> +	}
>> +
>> +	if (req->cmd->zmr.zra != NVME_ZRA_ZONE_REPORT) {
>> +		status = NVME_SC_INVALID_FIELD;
>> +		goto out;
>> +	}
>> +
>> +	if (req->cmd->zmr.zrasf != NVME_ZRASF_ZONE_REPORT_ALL) {
>> +		status = NVME_SC_INVALID_FIELD;
>> +		goto out;
>> +	}
>> +
>> +	if (req->cmd->zmr.pr != NVME_REPORT_ZONE_PARTIAL)
>> +		status = NVME_SC_INVALID_FIELD;
>> +
>> +out:
> You really want to keep this (useless) label ? Without it, the status variable
> can be dropped and the code overall becomes so much easier to read... Not to
> mention that life will be easier to the compiler for optimizing this.
>
Will remove it in the next version.
>> +	return status;
>> +}
>> +
>> +/*
>> + *  ZNS related command implementation and helpers.
>> + */
>> +
>> +static inline u8 nvmet_zasl(unsigned int zone_append_sects)
>> +{
>> +	/*
>> +	 * Zone Append Size Limit is the value experessed in the units
>> +	 * of minimum memory page size (i.e. 12) and is reported power of 2.
>> +	 */
>> +	return ilog2((zone_append_sects << 9) >> NVMET_MPSMIN_SHIFT);
>> +}
>> +
>> +static inline bool nvmet_zns_update_zasl(struct nvmet_ns *ns)
>> +{
>> +	struct request_queue *q = ns->bdev->bd_disk->queue;
>> +	u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
>> +
>> +	if (ns->subsys->zasl)
>> +		return ns->subsys->zasl < zasl ? false : true;
>> +
>> +	ns->subsys->zasl = zasl;
>> +	return true;
>> +}
>> +
>> +
>> +static int nvmet_bdev_validate_zns_zones_cb(struct blk_zone *z,
>> +					    unsigned int idx, void *data)
>> +{
>> +	if (z->type == BLK_ZONE_TYPE_CONVENTIONAL)
>> +		return -EOPNOTSUPP;
>> +	return 0;
>> +}
>> +
>> +static bool nvmet_bdev_has_conv_zones(struct block_device *bdev)
>> +{
>> +	int ret;
>> +
>> +	if (bdev->bd_disk->queue->conv_zones_bitmap)
>> +		return true;
>> +
>> +	ret = blkdev_report_zones(bdev, 0, blkdev_nr_zones(bdev->bd_disk),
>> +				  nvmet_bdev_validate_zns_zones_cb, NULL);
>> +
>> +	return ret < 0 ? true : false;
> return ret <= 0;
>
> would be simpler.
>
> Note that "<=" includes the error case of the device not reporting any zone
> (device dead) as we should fail that case I think.
>
hmm will make that change.
>> +}
>> +
>> +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
>> +{
>> +	if (nvmet_bdev_has_conv_zones(ns->bdev))
>> +		return false;
>> +
>> +	/*
>> +	 * For ZBC and ZAC devices, writes into sequential zones must be aligned
>> +	 * to the device physical block size. So use this value as the logical
>> +	 * block size to avoid errors.
>> +	 */
>> +	ns->blksize_shift = blksize_bits(bdev_physical_block_size(ns->bdev));
>> +
>> +	if (!nvmet_zns_update_zasl(ns))
>> +		return false;
>> +
>> +	return !(get_capacity(ns->bdev->bd_disk) &
>> +			(bdev_zone_sectors(ns->bdev) - 1));
>> +}
>> +
>> +/*
>> + * ZNS related Admin and I/O command handlers.
>> + */
>> +void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
>> +{
>> +	u8 zasl = req->sq->ctrl->subsys->zasl;
>> +	struct nvmet_ctrl *ctrl = req->sq->ctrl;
>> +	struct nvme_id_ctrl_zns *id;
>> +	u16 status;
>> +
>> +	id = kzalloc(sizeof(*id), GFP_KERNEL);
>> +	if (!id) {
>> +		status = NVME_SC_INTERNAL;
>> +		goto out;
>> +	}
>> +
>> +	if (ctrl->ops->get_mdts)
>> +		id->zasl = min_t(u8, ctrl->ops->get_mdts(ctrl), zasl);
>> +	else
>> +		id->zasl = zasl;
>> +
>> +	status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
>> +
>> +	kfree(id);
>> +out:
>> +	nvmet_req_complete(req, status);
>> +}
>> +
>> +void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
>> +{
>> +	struct nvme_id_ns_zns *id_zns;
>> +	u16 status = NVME_SC_SUCCESS;
>> +	u64 zsze;
>> +
>> +	if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
>> +		req->error_loc = offsetof(struct nvme_identify, nsid);
>> +		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
>> +		goto out;
>> +	}
>> +
>> +	id_zns = kzalloc(sizeof(*id_zns), GFP_KERNEL);
>> +	if (!id_zns) {
>> +		status = NVME_SC_INTERNAL;
>> +		goto out;
>> +	}
>> +
>> +	req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
>> +	if (!req->ns) {
>> +		status = NVME_SC_INTERNAL;
>> +		goto done;
>> +	}
>> +
>> +	if (!bdev_is_zoned(req->ns->bdev)) {
>> +		req->error_loc = offsetof(struct nvme_identify, nsid);
>> +		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
>> +		goto done;
>> +	}
>> +
>> +	nvmet_ns_revalidate(req->ns);
>> +	zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
>> +					req->ns->blksize_shift;
>> +	id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
>> +	id_zns->mor = cpu_to_le32(bdev_max_open_zones(req->ns->bdev));
>> +	id_zns->mar = cpu_to_le32(bdev_max_active_zones(req->ns->bdev));
>> +
>> +done:
>> +	status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns));
>> +	kfree(id_zns);
>> +out:
>> +	nvmet_req_complete(req, status);
>> +}
>> +
>> +struct nvmet_report_zone_data {
>> +	struct nvmet_ns *ns;
>> +	struct nvme_zone_report *rz;
>> +};
>> +
>> +static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned int idx,
>> +				     void *data)
>> +{
>> +	struct nvmet_report_zone_data *report_zone_data = data;
>> +	struct nvme_zone_descriptor *entries = report_zone_data->rz->entries;
>> +	struct nvmet_ns *ns = report_zone_data->ns;
>> +
>> +	entries[idx].zcap = nvmet_sect_to_lba(ns, z->capacity);
>> +	entries[idx].zslba = nvmet_sect_to_lba(ns, z->start);
>> +	entries[idx].wp = nvmet_sect_to_lba(ns, z->wp);
>> +	entries[idx].za = z->reset ? 1 << 2 : 0;
>> +	entries[idx].zt = z->type;
>> +	entries[idx].zs = z->cond << 4;
>> +
>> +	return 0;
>> +}
>> +
>> +void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req)
>> +{
>> +	sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
>> +	u32 bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
>> +	struct nvmet_report_zone_data data = { .ns = req->ns };
>> +	unsigned int nr_zones;
>> +	int reported_zones;
>> +	u16 status;
>> +
>> +	nr_zones = (bufsize - sizeof(struct nvme_zone_report)) /
>> +			sizeof(struct nvme_zone_descriptor);
> I really would prefer this code to be moved down, before the call to
> blkdev_report_zones().
>
> You can also optimize this value a little with a min() of the value above and of
> DIV_ROUND_UP(dev_capacity - sect, zone size). But not a big deal I think.
I did that as per your last comment, when I did the code review with
host side it didn't match, I've a cleanup patch series to fix nits and
host side css checks for zns I've added this into that series.
>> +
>> +	status = nvmet_bdev_zns_checks(req);
>> +	if (status)
>> +		goto out;
>> +
>> +	data.rz = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY | __GFP_ZERO);
> Shouldn't this be GFP_NOIO ? Also, is the NORETRY critical ?
Yes on GFP_NOIO. NORETRY critical means how we areallocating the memory on
the host side nvme_zns_alloc_report_buffer() ?
> blkdev_report_zones() will do mem allocation too and at leadt scsi does retry.
>
>> +	if (!data.rz) {
>> +		status = NVME_SC_INTERNAL;
>> +		goto out;
>> +	}
>> +
>> +	reported_zones = blkdev_report_zones(req->ns->bdev, sect, nr_zones,
>> +					     nvmet_bdev_report_zone_cb,
>> +					     &data);
>> +	if (reported_zones < 0) {
>> +		status = NVME_SC_INTERNAL;
>> +		goto out_free_report_zones;
>> +	}
>> +
>> +	data.rz->nr_zones = cpu_to_le64(reported_zones);
>> +
>> +	status = nvmet_copy_to_sgl(req, 0, data.rz, bufsize);
>> +
>> +out_free_report_zones:
>> +	kvfree(data.rz);
>> +out:
>> +	nvmet_req_complete(req, status);
>> +}
>> +
>> +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
>> +{
>> +	sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
>> +	sector_t nr_sect = bdev_zone_sectors(req->ns->bdev);
>> +	u16 status = NVME_SC_SUCCESS;
>> +	enum req_opf op;
>> +	int ret;
>> +
>> +	if (req->cmd->zms.select_all)
>> +		nr_sect = get_capacity(req->ns->bdev->bd_disk);
>> +
>> +	switch (req->cmd->zms.zsa) {
>> +	case NVME_ZONE_OPEN:
>> +		op = REQ_OP_ZONE_OPEN;
>> +		break;
>> +	case NVME_ZONE_CLOSE:
>> +		op = REQ_OP_ZONE_CLOSE;
>> +		break;
>> +	case NVME_ZONE_FINISH:
>> +		op = REQ_OP_ZONE_FINISH;
>> +		break;
>> +	case NVME_ZONE_RESET:
>> +		op = REQ_OP_ZONE_RESET;
>> +		break;
>> +	default:
>> +		status = NVME_SC_INVALID_FIELD;
>> +		goto out;
>> +	}
>> +
>> +	ret = blkdev_zone_mgmt(req->ns->bdev, op, sect, nr_sect, GFP_KERNEL);
> GFP_NOIO ?
Yes.
>
>> +	if (ret)
>> +		status = NVME_SC_INTERNAL;
>> +out:
>> +	nvmet_req_complete(req, status);
>> +}
>> +
>> +void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
>> +{
>> +	sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
>> +	struct request_queue *q = req->ns->bdev->bd_disk->queue;
>> +	unsigned int max_sects = queue_max_zone_append_sectors(q);
>> +	u16 status = NVME_SC_SUCCESS;
>> +	unsigned int total_len = 0;
>> +	struct scatterlist *sg;
>> +	int ret = 0, sg_cnt;
>> +	struct bio *bio;
>> +
>> +	if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
>> +		return;
>> +
>> +	if (!req->sg_cnt) {
>> +		nvmet_req_complete(req, 0);
>> +		return;
>> +	}
>> +
>> +	if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
>> +		bio = &req->b.inline_bio;
>> +		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
>> +	} else {
>> +		bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
>> +	}
>> +
>> +	bio_set_dev(bio, req->ns->bdev);
>> +	bio->bi_iter.bi_sector = sect;
>> +	bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
>> +	if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
>> +		bio->bi_opf |= REQ_FUA;
>> +
>> +	for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
>> +		struct page *p = sg_page(sg);
>> +		unsigned int l = sg->length;
>> +		unsigned int o = sg->offset;
>> +		bool same_page = false;
>> +
>> +		ret = bio_add_hw_page(q, bio, p, l, o, max_sects, &same_page);
>> +		if (ret != sg->length) {
>> +			status = NVME_SC_INTERNAL;
>> +			goto out_bio_put;
>> +		}
>> +		if (same_page)
>> +			put_page(p);
>> +
>> +		total_len += sg->length;
>> +	}
>> +
>> +	if (total_len != nvmet_rw_data_len(req)) {
>> +		status = NVME_SC_INTERNAL | NVME_SC_DNR;
>> +		goto out_bio_put;
>> +	}
>> +
>> +	ret = submit_bio_wait(bio);
>> +	req->cqe->result.u64 = nvmet_sect_to_lba(req->ns,
>> +						 bio->bi_iter.bi_sector);
>> +
>> +out_bio_put:
>> +	if (bio != &req->b.inline_bio)
>> +		bio_put(bio);
>> +	nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL : status);
>> +}
>>
>




More information about the Linux-nvme mailing list