[PATCH] nvme: retry commands if DNR bit is not set

Wed Nov 2 01:00:30 PDT 2022


On 2022/11/2 15:36, Hannes Reinecke wrote:
> Add a 'retries' argument to __nvme_submit_sync_cmd() to instruct
> the function to retry the command if the DNR bit is not set in
> the command result, and modify the authentication code to allow
> for retries.
> 
> Signed-off-by: Hannes Reinecke <hare at suse.de>
> ---
>   drivers/nvme/host/auth.c    |  2 +-
>   drivers/nvme/host/core.c    | 29 ++++++++++++++++++++++++-----
>   drivers/nvme/host/fabrics.c | 10 +++++-----
>   drivers/nvme/host/nvme.h    |  6 +++++-
>   4 files changed, 35 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
> index c8a6db7c4498..3b63aa155beb 100644
> --- a/drivers/nvme/host/auth.c
> +++ b/drivers/nvme/host/auth.c
> @@ -69,7 +69,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid,
>   
>   	ret = __nvme_submit_sync_cmd(q, &cmd, NULL, data, data_len,
>   				     qid == 0 ? NVME_QID_ANY : qid,
> -				     0, flags);
> +				     0, flags, nvme_max_retries);
>   	if (ret > 0)
>   		dev_warn(ctrl->device,
>   			"qid %d auth_send failed with status %d\n", qid, ret);
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index dc4220600585..d0de1a85596a 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -54,7 +54,7 @@ static unsigned char shutdown_timeout = 5;
>   module_param(shutdown_timeout, byte, 0644);
>   MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
>   
> -static u8 nvme_max_retries = 5;
> +u8 nvme_max_retries = 5;
>   module_param_named(max_retries, nvme_max_retries, byte, 0644);
>   MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
>   
> @@ -1016,7 +1016,7 @@ static int nvme_execute_rq(struct request *rq, bool at_head)
>    */
>   int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
>   		union nvme_result *result, void *buffer, unsigned bufflen,
> -		int qid, int at_head, blk_mq_req_flags_t flags)
> +		int qid, int at_head, blk_mq_req_flags_t flags, int retries)
>   {
>   	struct request *req;
>   	int ret;
> @@ -1030,6 +1030,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
>   	if (IS_ERR(req))
>   		return PTR_ERR(req);
>   	nvme_init_request(req, cmd);
> +	nvme_req(req)->retries = retries;
>   
>   	if (buffer && bufflen) {
>   		ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
> @@ -1037,8 +1038,18 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
>   			goto out;
>   	}
>   
> +retry:
>   	req->rq_flags |= RQF_QUIET;
>   	ret = nvme_execute_rq(req, at_head);
> +	if (ret > 0) {
> +		struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
> +
> +		if (ctrl->kas)
> +			ctrl->comp_seen = true;
> +
> +		if (retries-- && nvme_decide_disposition(req) != COMPLETE)
> +			goto retry;
> +	}
>   	if (result && ret >= 0)
>   		*result = nvme_req(req)->result;
Reusing the existing retry mechanism may be better.
We can do like this:
Do not set REQ_FAILFAST_DRIVER in nvme_init_request If need retry.
	if (noretry)
		req->cmd_flags |= REQ_FAILFAST_DRIVER;