[PATCH 1/2] nvme-auth: retry command if DNR bit is not set

Sagi Grimberg sagi at grimberg.me
Mon Sep 5 05:05:59 PDT 2022



On 8/30/22 15:49, Hannes Reinecke wrote:
> If the cqe returns a status with the DNR bit not set we should
> retry the command; otherwise we might incur spurious failures.
> 
> Reported-by: Martin George <marting at netapp.com>
> Signed-off-by: Hannes Reinecke <hare at suse.de>
> ---
>   drivers/nvme/host/auth.c |  2 +-
>   drivers/nvme/host/core.c | 20 ++++++++++++++++++--
>   include/linux/blk-mq.h   |  2 ++
>   3 files changed, 21 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
> index c8a6db7c4498..4ca3f7d042f1 100644
> --- a/drivers/nvme/host/auth.c
> +++ b/drivers/nvme/host/auth.c
> @@ -69,7 +69,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid,
>   
>   	ret = __nvme_submit_sync_cmd(q, &cmd, NULL, data, data_len,
>   				     qid == 0 ? NVME_QID_ANY : qid,
> -				     0, flags);
> +				     0, BLK_MQ_REQ_RETRY | flags);

Surely there is a local way to have this than to leak a flag to
blk-mq...

>   	if (ret > 0)
>   		dev_warn(ctrl->device,
>   			"qid %d auth_send failed with status %d\n", qid, ret);
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 66ae23e9cb0e..a77f406512eb 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -355,7 +355,8 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
>   	if (likely(nvme_req(req)->status == 0))
>   		return COMPLETE;
>   
> -	if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED)
> +	if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED &&
> +	    !(nvme_req(req)->status & NVME_SC_DNR))
>   		return AUTHENTICATE;
>   
>   	if (blk_noretry_request(req) ||
> @@ -1037,15 +1038,30 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
>   	if (IS_ERR(req))
>   		return PTR_ERR(req);
>   	nvme_init_request(req, cmd);
> +	if (flags & BLK_MQ_REQ_RETRY)
> +		nvme_req(req)->retries = nvme_max_retries;

Maybe just rename this to __nvme_submit_sync_cmd_retries() that accepts
retries argument, and then __nvme_submit_sync_cmd becomes:

int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command 
*cmd,
                 union nvme_result *result, void *buffer, unsigned bufflen,
                 int qid, int at_head, blk_mq_req_flags_t flags)
{
	return __nvme_submit_sync_cmd_retries(q, cmd, result,
			buffer, bufflen, qid, at_head, flags, 0)
}

And nvme_auth_submit() can call it with nvme_max_retries...

>   
>   	if (buffer && bufflen) {
>   		ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
>   		if (ret)
>   			goto out;
>   	}
> -
> +retry:
>   	req->rq_flags |= RQF_QUIET;
>   	ret = nvme_execute_rq(req, at_head);
> +	if (ret > 0) {
> +		struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
> +
> +		if (ctrl->kas)
> +			ctrl->comp_seen = true;
> +
> +		switch (nvme_decide_disposition(req)) {
> +		case COMPLETE:
> +			break;
> +		default:
> +			goto retry;
> +		}
> +	}

Why is this needed? isn't nvme_complete_rq called on this request?

>   	if (result && ret >= 0)
>   		*result = nvme_req(req)->result;
>    out:
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index effee1dc715a..acd833f37cd4 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -713,6 +713,8 @@ enum {
>   	BLK_MQ_REQ_RESERVED	= (__force blk_mq_req_flags_t)(1 << 1),
>   	/* set RQF_PM */
>   	BLK_MQ_REQ_PM		= (__force blk_mq_req_flags_t)(1 << 2),
> +	/* Retry reserved commands */
> +	BLK_MQ_REQ_RETRY	= (__force blk_mq_req_flags_t)(1 << 3),

I don't think this is appropriate for what this is trying to do...



More information about the Linux-nvme mailing list