[RESEND PATCH] nvme: explicitly use normal NVMe error handling when appropriate
Sagi Grimberg
sagi at grimberg.me
Fri Aug 14 02:53:14 EDT 2020
> +static inline enum nvme_disposition nvme_req_disposition(struct request *req)
> +{
> + if (likely(nvme_req(req)->status == 0))
> + return COMPLETE;
> +
> + if (blk_noretry_request(req) ||
> + (nvme_req(req)->status & NVME_SC_DNR) ||
> + nvme_req(req)->retries >= nvme_max_retries)
> + return COMPLETE;
> +
> + if (req->cmd_flags & REQ_NVME_MPATH) {
> + switch (nvme_req(req)->status & 0x7ff) {
> + case NVME_SC_ANA_TRANSITION:
> + case NVME_SC_ANA_INACCESSIBLE:
> + case NVME_SC_ANA_PERSISTENT_LOSS:
> + return REDIRECT_ANA;
> + case NVME_SC_HOST_PATH_ERROR:
> + case NVME_SC_HOST_ABORTED_CMD:
> + return REDIRECT_TMP;
> + }
> + }
> +
> + if (blk_queue_dying(req->q))
> + return COMPLETE;
> + return RETRY;
> +}
> +
> +static inline void nvme_complete_req(struct request *req)
> {
> blk_status_t status = nvme_error_status(nvme_req(req)->status);
>
> - trace_nvme_complete_rq(req);
> + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> + req_op(req) == REQ_OP_ZONE_APPEND)
> + req->__sector = nvme_lba_to_sect(req->q->queuedata,
> + le64_to_cpu(nvme_req(req)->result.u64));
> +
> + nvme_trace_bio_complete(req, status);
> + blk_mq_end_request(req, status);
> +}
>
> +void nvme_complete_rq(struct request *req)
> +{
> + trace_nvme_complete_rq(req);
> nvme_cleanup_cmd(req);
>
> if (nvme_req(req)->ctrl->kas)
> nvme_req(req)->ctrl->comp_seen = true;
>
> - if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
> - if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req))
> - return;
> -
> - if (!blk_queue_dying(req->q)) {
> - nvme_retry_req(req);
> - return;
> - }
> - } else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> - req_op(req) == REQ_OP_ZONE_APPEND) {
> - req->__sector = nvme_lba_to_sect(req->q->queuedata,
> - le64_to_cpu(nvme_req(req)->result.u64));
> + switch (nvme_req_disposition(req)) {
> + case COMPLETE:
> + nvme_complete_req(req);
nvme_complete_rq calling nvme_complete_req... Maybe call it
__nvme_complete_rq instead?
> + return;
> + case RETRY:
> + nvme_retry_req(req);
> + return;
> + case REDIRECT_ANA:
> + nvme_failover_req(req, true);
> + return;
> + case REDIRECT_TMP:
> + nvme_failover_req(req, false);
> + return;
> }
> -
> - nvme_trace_bio_complete(req, status);
> - blk_mq_end_request(req, status);
> }
> EXPORT_SYMBOL_GPL(nvme_complete_rq);
>
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 3ded54d2c9c6ad..0c22b2c88687a2 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -65,51 +65,32 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
> }
> }
>
> -bool nvme_failover_req(struct request *req)
> +void nvme_failover_req(struct request *req, bool is_ana_status)
> {
> struct nvme_ns *ns = req->q->queuedata;
> - u16 status = nvme_req(req)->status;
> unsigned long flags;
>
> - switch (status & 0x7ff) {
> - case NVME_SC_ANA_TRANSITION:
> - case NVME_SC_ANA_INACCESSIBLE:
> - case NVME_SC_ANA_PERSISTENT_LOSS:
> - /*
> - * If we got back an ANA error we know the controller is alive,
> - * but not ready to serve this namespaces. The spec suggests
> - * we should update our general state here, but due to the fact
> - * that the admin and I/O queues are not serialized that is
> - * fundamentally racy. So instead just clear the current path,
> - * mark the the path as pending and kick of a re-read of the ANA
> - * log page ASAP.
> - */
> - nvme_mpath_clear_current_path(ns);
> - if (ns->ctrl->ana_log_buf) {
> - set_bit(NVME_NS_ANA_PENDING, &ns->flags);
> - queue_work(nvme_wq, &ns->ctrl->ana_work);
> - }
> - break;
> - case NVME_SC_HOST_PATH_ERROR:
> - case NVME_SC_HOST_ABORTED_CMD:
> - /*
> - * Temporary transport disruption in talking to the controller.
> - * Try to send on a new path.
> - */
> - nvme_mpath_clear_current_path(ns);
> - break;
> - default:
> - /* This was a non-ANA error so follow the normal error path. */
> - return false;
> + nvme_mpath_clear_current_path(ns);
> +
> + /*
> + * If we got back an ANA error we know the controller is alive, but not
> + * ready to serve this namespaces. The spec suggests we should update
> + * our general state here, but due to the fact that the admin and I/O
> + * queues are not serialized that is fundamentally racy. So instead
> + * just clear the current path, mark the the path as pending and kick
> + * of a re-read of the ANA log page ASAP.
> + */
> + if (is_ana_status && ns->ctrl->ana_log_buf) {
Maybe call nvme_req_disposition again locally here to not carry
the is_ana_status. But not a biggy..
Overall this looks good I think.
More information about the Linux-nvme
mailing list