[PATCH V9 4/9] nvmet: add ZBD over ZNS backend support

Damien Le Moal Damien.LeMoal at wdc.com
Tue Jan 12 02:52:27 EST 2021


On 2021/01/12 16:48, Christoph Hellwig wrote:
>> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
>> index a50b7bcac67a..bdf09d8faa48 100644
>> --- a/drivers/nvme/target/admin-cmd.c
>> +++ b/drivers/nvme/target/admin-cmd.c
>> @@ -191,6 +191,15 @@ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
>>  		log->iocs[nvme_cmd_dsm]			= cpu_to_le32(1 << 0);
>>  		log->iocs[nvme_cmd_write_zeroes]	= cpu_to_le32(1 << 0);
>>  		break;
>> +	case NVME_CSI_ZNS:
>> +		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
>> +			u32 *iocs = log->iocs;
>> +
>> +			iocs[nvme_cmd_zone_append]	= cpu_to_le32(1 << 0);
>> +			iocs[nvme_cmd_zone_mgmt_send]	= cpu_to_le32(1 << 0);
>> +			iocs[nvme_cmd_zone_mgmt_recv]	= cpu_to_le32(1 << 0);
>> +		}
>> +		break;
> 
> We need to return errors if the command set is not actually supported.
> I also think splitting this into one helper per command set would
> be nice.
> 
>> @@ -644,6 +653,17 @@ static void nvmet_execute_identify_desclist(struct nvmet_req *req)
>>  	if (status)
>>  		goto out;
>>  
>> +	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
>> +		u16 nvme_cis_zns = NVME_CSI_ZNS;
>> +
>> +		if (req->ns->csi == NVME_CSI_ZNS)
>> +			status = nvmet_copy_ns_identifier(req, NVME_NIDT_CSI,
>> +							  NVME_NIDT_CSI_LEN,
>> +							  &nvme_cis_zns, &off);
>> +		if (status)
>> +			goto out;
>> +	}
> 
> We need to add the CSI for every namespace, i.e. something like:
> 
> 	status = nvmet_copy_ns_identifier(req, NVME_NIDT_CSI, NVME_NIDT_CSI_LEN,
> 					  &req->ns->csi);		
> 	if (status)
> 		goto out;
> 
> and this hunk needs to go into the CSI patch.
> 
>>  	if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
>>  			off) != NVME_IDENTIFY_DATA_SIZE - off)
>>  		status = NVME_SC_INTERNAL | NVME_SC_DNR;
>> @@ -660,8 +680,16 @@ static void nvmet_execute_identify(struct nvmet_req *req)
>>  	switch (req->cmd->identify.cns) {
>>  	case NVME_ID_CNS_NS:
>>  		return nvmet_execute_identify_ns(req);
>> +	case NVME_ID_CNS_CS_NS:
>> +		if (req->cmd->identify.csi == NVME_CSI_ZNS)
>> +			return nvmet_execute_identify_cns_cs_ns(req);
>> +		break;
>>  	case NVME_ID_CNS_CTRL:
>>  		return nvmet_execute_identify_ctrl(req);
>> +	case NVME_ID_CNS_CS_CTRL:
>> +		if (req->cmd->identify.csi == NVME_CSI_ZNS)
>> +			return nvmet_execute_identify_cns_cs_ctrl(req);
>> +		break;
> 
> How does the CSI get mirrored into the cns field?
> 
>> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
>> index 672e4009f8d6..17d5da062a5a 100644
>> --- a/drivers/nvme/target/core.c
>> +++ b/drivers/nvme/target/core.c
>> @@ -1107,6 +1107,7 @@ static inline u8 nvmet_cc_iocqes(u32 cc)
>>  static inline bool nvmet_cc_css_check(u8 cc_css)
>>  {
>>  	switch (cc_css <<= NVME_CC_CSS_SHIFT) {
>> +	case NVME_CC_CSS_CSI:
>>  	case NVME_CC_CSS_NVM:
>>  		return true;
>>  	default:
>> @@ -1173,6 +1174,8 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
>>  {
>>  	/* command sets supported: NVMe command set: */
>>  	ctrl->cap = (1ULL << 37);
>> +	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
>> +		ctrl->cap |= (1ULL << 43);
>>  	/* CC.EN timeout in 500msec units: */
>>  	ctrl->cap |= (15ULL << 24);
>>  	/* maximum queue entries supported: */
> 
> This needs to go into a separate patch for multiple command set support.
> We can probably merge the CAP and CC bits with the CSI support, though.
> 
>> +	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && bdev_is_zoned(ns->bdev)) {
> 
> bdev_is_zoned should be probably stubbed out for !CONFIG_BLK_DEV_ZONED
> these days.
> 
>> +/*
>> + *  ZNS related command implementation and helpers.
>> + */
> 
> Well, that is the description of the whole file, isn't it?  I don't think
> this comment adds much value.
> 
>> +	/*
>> +	 * For ZBC and ZAC devices, writes into sequential zones must be aligned
>> +	 * to the device physical block size. So use this value as the logical
>> +	 * block size to avoid errors.
>> +	 */
> 
> I do not understand the logic here, given that NVMe does not have
> conventional zones.

512e SAS & SATA SMR drives (512B logical, 4K physical) are a big thing, and for
these, all writes in sequential zones must be 4K aligned. So I suggested to
Chaitanya to simply use the physical block size as the LBA size for the target
to avoid weird IO errors that would not make sense in ZNS/NVMe world (e.g. 512B
aligned write requests failing).


-- 
Damien Le Moal
Western Digital Research



More information about the Linux-nvme mailing list