[PATCHv3 3/5] nvme: implement I/O Command Sets Command Set support

Niklas Cassel Niklas.Cassel at wdc.com
Tue Jun 23 07:25:53 EDT 2020


On Tue, Jun 23, 2020 at 01:53:47AM -0700, Sagi Grimberg wrote:
> 
> 
> On 6/22/20 9:25 AM, Keith Busch wrote:
> > From: Niklas Cassel <niklas.cassel at wdc.com>
> > 
> > Implements support for the I/O Command Sets command set. The command set
> > introduces a method to enumerate multiple command sets per namespace. If
> > the command set is exposed, this method for enumeration will be used
> > instead of the traditional method that uses the CC.CSS register command
> > set register for command set identification.
> > 
> > For namespaces where the Command Set Identifier is not supported or
> > recognized, the specific namespace will not be created.
> > 
> > Reviewed-by: Javier González <javier.gonz at samsung.com>
> > Reviewed-by: Martin K. Petersen <martin.petersen at oracle.com>
> > Reviewed-by: Johannes Thumshirn <johannes.thumshirn at wdc.com>
> > Reviewed-by: Matias Bjørling <matias.bjorling at wdc.com>
> > Reviewed-by: Daniel Wagner <dwagner at suse.de>
> > Signed-off-by: Niklas Cassel <niklas.cassel at wdc.com>
> > ---
> >   drivers/nvme/host/core.c | 48 +++++++++++++++++++++++++++++++++-------
> >   drivers/nvme/host/nvme.h |  1 +
> >   include/linux/nvme.h     | 19 ++++++++++++++--
> >   3 files changed, 58 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 9491dbcfe81a..45a3cb5a35bd 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -1056,8 +1056,13 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
> >   	return error;
> >   }
> > +static bool nvme_multi_css(struct nvme_ctrl *ctrl)
> > +{
> > +	return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
> > +}
> > +
> >   static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
> > -		struct nvme_ns_id_desc *cur)
> > +		struct nvme_ns_id_desc *cur, bool *csi_seen)
> >   {
> >   	const char *warn_str = "ctrl returned bogus length:";
> >   	void *data = cur;
> > @@ -1087,6 +1092,15 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
> >   		}
> >   		uuid_copy(&ids->uuid, data + sizeof(*cur));
> >   		return NVME_NIDT_UUID_LEN;
> > +	case NVME_NIDT_CSI:
> > +		if (cur->nidl != NVME_NIDT_CSI_LEN) {
> > +			dev_warn(ctrl->device, "%s %d for NVME_NIDT_CSI\n",
> > +				 warn_str, cur->nidl);
> > +			return -1;
> > +		}
> > +		memcpy(&ids->csi, data + sizeof(*cur), NVME_NIDT_CSI_LEN);
> > +		*csi_seen = true;
> > +		return NVME_NIDT_CSI_LEN;
> >   	default:
> >   		/* Skip unknown types */
> >   		return cur->nidl;
> > @@ -1097,10 +1111,9 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
> >   		struct nvme_ns_ids *ids)
> >   {
> >   	struct nvme_command c = { };
> > -	int status;
> > +	bool csi_seen = false;
> > +	int status, pos, len;
> >   	void *data;
> > -	int pos;
> > -	int len;
> >   	c.identify.opcode = nvme_admin_identify;
> >   	c.identify.nsid = cpu_to_le32(nsid);
> > @@ -1130,13 +1143,19 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
> >   		if (cur->nidl == 0)
> >   			break;
> > -		len = nvme_process_ns_desc(ctrl, ids, cur);
> > +		len = nvme_process_ns_desc(ctrl, ids, cur, &csi_seen);
> >   		if (len < 0)
> >   			goto free_data;
> >   		len += sizeof(*cur);
> >   	}
> >   free_data:
> > +	if (!status && nvme_multi_css(ctrl) && !csi_seen) {
> 
> We will clear the status if we detect a path error, that is to
> avoid needlessly removing the ns for path failures, so you should
> check at the goto site.

The problem is that this check has to be done after checking all the ns descs,
so this check to be done as the final thing, at least after processing all the
ns descs. No matter if nvme_process_ns_desc() returned an error, or if
simply NVME_NIDT_CSI wasn't part of the ns desc list, so the loop reached the
end without error.

Even if the nvme command failed and the status was cleared:

                if (status > 0 && !(status & NVME_SC_DNR))
                        status = 0;

we still need to return an error, if (nvme_multi_css(ctrl) && !csi_seen).
(Not reporting a CSI when nvme_multi_css() is enabled, is fatal.)

That is why the code looks like it does.

I guess we could do something like this, which does the same thing,
but perhaps is a bit clearer:

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e95f0c498a6b..bef687b9a277 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1160,8 +1160,10 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
                  * Don't treat an error as fatal, as we potentially already
                  * have a NGUID or EUI-64.
                  */
-               if (status > 0 && !(status & NVME_SC_DNR))
+               if (status > 0 && !(status & NVME_SC_DNR)) {
                        status = 0;
+                       goto csi_check;
+               }
                goto free_data;
        }

@@ -1173,17 +1175,17 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,

                len = nvme_process_ns_desc(ctrl, ids, cur, &csi_seen);
                if (len < 0)
-                       goto free_data;
+                       goto csi_check;

                len += sizeof(*cur);
        }
-free_data:
-       if (!status && nvme_multi_css(ctrl) && !csi_seen) {
+csi_check:
+       if (nvme_multi_css(ctrl) && !csi_seen) {
                dev_warn(ctrl->device, "Command set not reported for nsid:%d\n",
                         nsid);
                status = -EINVAL;
        }
-
+free_data:
        kfree(data);
        return status;
 }


> 
> > +		dev_warn(ctrl->device, "Command set not reported for nsid:%d\n",
> > +			 nsid);
> > +		status = -EINVAL;
> > +	}
> > +
> >   	kfree(data);
> >   	return status;
> >   }
> > @@ -1792,7 +1811,7 @@ static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
> >   		memcpy(ids->eui64, id->eui64, sizeof(id->eui64));
> >   	if (ctrl->vs >= NVME_VS(1, 2, 0))
> >   		memcpy(ids->nguid, id->nguid, sizeof(id->nguid));
> > -	if (ctrl->vs >= NVME_VS(1, 3, 0))
> > +	if (ctrl->vs >= NVME_VS(1, 3, 0) || nvme_multi_css(ctrl))
> >   		return nvme_identify_ns_descs(ctrl, nsid, ids);
> >   	return 0;
> >   }
> > @@ -1808,7 +1827,8 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
> >   {
> >   	return uuid_equal(&a->uuid, &b->uuid) &&
> >   		memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 &&
> > -		memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0;
> > +		memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0 &&
> > +		a->csi == b->csi;
> >   }
> >   static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
> > @@ -1930,6 +1950,15 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
> >   	if (ns->lba_shift == 0)
> >   		ns->lba_shift = 9;
> > +	switch (ns->head->ids.csi) {
> > +	case NVME_CSI_NVM:
> > +		break;
> > +	default:
> > +		dev_warn(ctrl->device, "unknown csi:%d ns:%d\n",
> > +			ns->head->ids.csi, ns->head->ns_id);
> > +		return -ENODEV;
> > +	}
> 
> Not sure we need a switch-case statement for a single case target...

I would consider it two cases. A supported CSI or a non-supported CSI
(which means any CSI value != NVME_CSI_NVM).

However, a follow up patch (patch 5/5 in this series) adds another case
to this switch-case statement (NVME_CSI_ZNS).

I guess this patch could have used an if-else statement, and patch 5/5
replaced the if-statement with a switch-case.
However, since a patch in the same series actually adds another case,
I think that it is more clear this way.
(A switch-case with only two cases added, in a patch that is not the last
one in the series, suggests (at least to me), that it will most likely be
extended in a following patch.)


Kind regards,
Niklas


More information about the Linux-nvme mailing list