[PATCH 1/2] nvme-auth: retry command if DNR bit is not set
Sagi Grimberg
sagi at grimberg.me
Mon Sep 5 05:05:59 PDT 2022
On 8/30/22 15:49, Hannes Reinecke wrote:
> If the cqe returns a status with the DNR bit not set we should
> retry the command; otherwise we might incur spurious failures.
>
> Reported-by: Martin George <marting at netapp.com>
> Signed-off-by: Hannes Reinecke <hare at suse.de>
> ---
> drivers/nvme/host/auth.c | 2 +-
> drivers/nvme/host/core.c | 20 ++++++++++++++++++--
> include/linux/blk-mq.h | 2 ++
> 3 files changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
> index c8a6db7c4498..4ca3f7d042f1 100644
> --- a/drivers/nvme/host/auth.c
> +++ b/drivers/nvme/host/auth.c
> @@ -69,7 +69,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid,
>
> ret = __nvme_submit_sync_cmd(q, &cmd, NULL, data, data_len,
> qid == 0 ? NVME_QID_ANY : qid,
> - 0, flags);
> + 0, BLK_MQ_REQ_RETRY | flags);
Surely there is a local way to have this than to leak a flag to
blk-mq...
> if (ret > 0)
> dev_warn(ctrl->device,
> "qid %d auth_send failed with status %d\n", qid, ret);
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 66ae23e9cb0e..a77f406512eb 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -355,7 +355,8 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
> if (likely(nvme_req(req)->status == 0))
> return COMPLETE;
>
> - if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED)
> + if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED &&
> + !(nvme_req(req)->status & NVME_SC_DNR))
> return AUTHENTICATE;
>
> if (blk_noretry_request(req) ||
> @@ -1037,15 +1038,30 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
> if (IS_ERR(req))
> return PTR_ERR(req);
> nvme_init_request(req, cmd);
> + if (flags & BLK_MQ_REQ_RETRY)
> + nvme_req(req)->retries = nvme_max_retries;
Maybe just rename this to __nvme_submit_sync_cmd_retries() that accepts
retries argument, and then __nvme_submit_sync_cmd becomes:
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command
*cmd,
union nvme_result *result, void *buffer, unsigned bufflen,
int qid, int at_head, blk_mq_req_flags_t flags)
{
return __nvme_submit_sync_cmd_retries(q, cmd, result,
buffer, bufflen, qid, at_head, flags, 0)
}
And nvme_auth_submit() can call it with nvme_max_retries...
>
> if (buffer && bufflen) {
> ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
> if (ret)
> goto out;
> }
> -
> +retry:
> req->rq_flags |= RQF_QUIET;
> ret = nvme_execute_rq(req, at_head);
> + if (ret > 0) {
> + struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
> +
> + if (ctrl->kas)
> + ctrl->comp_seen = true;
> +
> + switch (nvme_decide_disposition(req)) {
> + case COMPLETE:
> + break;
> + default:
> + goto retry;
> + }
> + }
Why is this needed? isn't nvme_complete_rq called on this request?
> if (result && ret >= 0)
> *result = nvme_req(req)->result;
> out:
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index effee1dc715a..acd833f37cd4 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -713,6 +713,8 @@ enum {
> BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1),
> /* set RQF_PM */
> BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2),
> + /* Retry reserved commands */
> + BLK_MQ_REQ_RETRY = (__force blk_mq_req_flags_t)(1 << 3),
I don't think this is appropriate for what this is trying to do...
More information about the Linux-nvme
mailing list