[PATCH] nvmet: fix nvme status code when namespace is disabled

Hannes Reinecke hare at suse.de
Thu May 23 02:49:25 PDT 2024


On 4/28/24 11:25, Sagi Grimberg wrote:
> If the user disabled a nvmet namesapce, it is removed from
> the subsystem namespaces list. When nvmet processes a command
> directed to an nsid that was disabled, it cannot differentiate
> between a nsid that is disabled vs. a non-existent namespace,
> and resorts to return NVME_SC_INVALID_NS with the dnr bit set.
> 
> This translates to a non-retryable status for the host, which
> translates to a user error. We should expect disabled namespaces
> to not cause an I/O error in a multipath environment.
> 
> Address this by searching a configfs item for the namespace nvmet
> failed to find, and if we found one, conclude that the namespace
> is disabled (perhaps temporarily). Return NVME_SC_INTERNAL_PATH_ERROR
> in this case and keep DNR bit cleared.
> 
> Reported-by: Jirong Feng <jirong.feng at easystack.cn>
> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
> ---
> Jirong Feng, can you please re-run the test as requested and verify
> that the I/O error goes away. If so, please reply with your "Tested-by"
> tag.
> 
>   drivers/nvme/target/configfs.c | 13 +++++++++++++
>   drivers/nvme/target/core.c     |  8 ++++++--
>   drivers/nvme/target/nvmet.h    |  1 +
>   3 files changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
> index 77a6e817b315..e744bd0a9cc2 100644
> --- a/drivers/nvme/target/configfs.c
> +++ b/drivers/nvme/target/configfs.c
> @@ -754,6 +754,19 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
>   	NULL,
>   };
>   
> +bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid)
> +{
> +	struct config_item *ns_item;
> +	char name[4] = {};
> +
> +	if (sprintf(name, "%u", nsid) <= 0)
> +		return false;
> +	mutex_lock(&subsys->namespaces_group.cg_subsys->su_mutex);
> +	ns_item = config_group_find_item(&subsys->namespaces_group, name);
> +	mutex_unlock(&subsys->namespaces_group.cg_subsys->su_mutex);
> +	return ns_item != NULL;
> +}
> +
>   static void nvmet_ns_release(struct config_item *item)
>   {
>   	struct nvmet_ns *ns = to_nvmet_ns(item);
> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index 78cba026610a..ed0b8f3707b4 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -437,11 +437,15 @@ void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
>   u16 nvmet_req_find_ns(struct nvmet_req *req)
>   {
>   	u32 nsid = le32_to_cpu(req->cmd->common.nsid);
> +	struct nvmet_subsys *subsys = nvmet_req_subsys(req);
>   
> -	req->ns = xa_load(&nvmet_req_subsys(req)->namespaces, nsid);
> +	req->ns = xa_load(&subsys->namespaces, nsid);
>   	if (unlikely(!req->ns)) {
>   		req->error_loc = offsetof(struct nvme_common_command, nsid);
> -		return NVME_SC_INVALID_NS | NVME_SC_DNR;
> +		if (nvmet_subsys_nsid_exists(subsys, nsid))
> +			return NVME_SC_INTERNAL_PATH_ERROR;
> +		else
> +			return NVME_SC_INVALID_NS | NVME_SC_DNR;
>   	}
>   
>   	percpu_ref_get(&req->ns->ref);
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index f460728e1df1..c1306de1f4dd 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -543,6 +543,7 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
>   		struct nvmet_host *host);
>   void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
>   		u8 event_info, u8 log_page);
> +bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid);
>   
>   #define NVMET_MIN_QUEUE_SIZE	16
>   #define NVMET_MAX_QUEUE_SIZE	1024

Shouldn't we rather use 'ANA inaccessible' status here?
Just to make it clear that the namespace is disabled, and not some other 
internal error?

Cheers,

Hannes




More information about the Linux-nvme mailing list