[PATCH 2/3] nvme: refactor command completion
Chao Leng
lengchao at huawei.com
Sun Aug 16 23:30:19 EDT 2020
On 2020/8/14 23:15, Christoph Hellwig wrote:
> Lift all the code to decide the dispostition of a completed command
> from nvme_complete_rq and nvme_failover_req into a new helper, which
> returns an emum of the potential actions. nvme_complete_rq then
> just switches on those and calls the proper helper for the action.
>
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> ---
> drivers/nvme/host/core.c | 76 ++++++++++++++++++++++-------------
> drivers/nvme/host/multipath.c | 47 ++++++----------------
> drivers/nvme/host/nvme.h | 31 ++++++++++++--
> 3 files changed, 90 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 88cff309d8e4f0..8d474adad721fb 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -241,17 +241,6 @@ static blk_status_t nvme_error_status(u16 status)
> }
> }
>
> -static inline bool nvme_req_needs_retry(struct request *req)
> -{
> - if (blk_noretry_request(req))
> - return false;
> - if (nvme_req(req)->status & NVME_SC_DNR)
> - return false;
> - if (nvme_req(req)->retries >= nvme_max_retries)
> - return false;
> - return true;
> -}
> -
> static void nvme_retry_req(struct request *req)
> {
> struct nvme_ns *ns = req->q->queuedata;
> @@ -268,34 +257,67 @@ static void nvme_retry_req(struct request *req)
> blk_mq_delay_kick_requeue_list(req->q, delay);
> }
>
> -void nvme_complete_rq(struct request *req)
> +enum nvme_disposition {
> + COMPLETE,
> + RETRY,
> + FAILOVER,
> +};
> +
> +static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
> {
> - blk_status_t status = nvme_error_status(nvme_req(req)->status);
> + u16 status = nvme_req(req)->status & 0x7ff;
>
> - trace_nvme_complete_rq(req);
> + if (likely(status == 0))
> + return COMPLETE;
>
> - nvme_cleanup_cmd(req);
> + if (blk_noretry_request(req) || (status & NVME_SC_DNR) ||
> + nvme_req(req)->retries >= nvme_max_retries)
> + return COMPLETE;
>
> - if (nvme_req(req)->ctrl->kas)
> - nvme_req(req)->ctrl->comp_seen = true;
> + if (req->cmd_flags & REQ_NVME_MPATH) {
> + if (nvme_is_path_error(status))
> + return FAILOVER;
> + }
>
> - if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
> - if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req))
> - return;
> + if (blk_queue_dying(req->q))
> + return COMPLETE;
>
> - if (!blk_queue_dying(req->q)) {
> - nvme_retry_req(req);
> - return;
> - }
> - } else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> - req_op(req) == REQ_OP_ZONE_APPEND) {
> + return RETRY;
> +}
> +
> +static inline void nvme_end_req(struct request *req)
> +{
> + blk_status_t status = nvme_error_status(nvme_req(req)->status);
> +
> + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> + req_op(req) == REQ_OP_ZONE_APPEND)
> req->__sector = nvme_lba_to_sect(req->q->queuedata,
> le64_to_cpu(nvme_req(req)->result.u64));
> - }
>
> nvme_trace_bio_complete(req, status);
> blk_mq_end_request(req, status);
> }
> +
> +void nvme_complete_rq(struct request *req)
> +{
> + trace_nvme_complete_rq(req);
> + nvme_cleanup_cmd(req);
> +
> + if (nvme_req(req)->ctrl->kas)
> + nvme_req(req)->ctrl->comp_seen = true;
> +
> + switch (nvme_decide_disposition(req)) {
> + case COMPLETE:
> + nvme_end_req(req);
> + return;
> + case RETRY:
> + nvme_retry_req(req);
> + return;
> + case FAILOVER:
> + nvme_failover_req(req);
> + return;
> + }
> +}
> EXPORT_SYMBOL_GPL(nvme_complete_rq);
>
> bool nvme_cancel_request(struct request *req, void *data, bool reserved)
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 3ded54d2c9c6ad..abc5bcf7209506 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -65,51 +65,30 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
> }
> }
>
> -bool nvme_failover_req(struct request *req)
> +void nvme_failover_req(struct request *req)
> {
> struct nvme_ns *ns = req->q->queuedata;
> - u16 status = nvme_req(req)->status;
> + u16 status = nvme_req(req)->status & 0x7ff;
> unsigned long flags;
>
> - switch (status & 0x7ff) {
> - case NVME_SC_ANA_TRANSITION:
> - case NVME_SC_ANA_INACCESSIBLE:
> - case NVME_SC_ANA_PERSISTENT_LOSS:
> - /*
> - * If we got back an ANA error we know the controller is alive,
> - * but not ready to serve this namespaces. The spec suggests
> - * we should update our general state here, but due to the fact
> - * that the admin and I/O queues are not serialized that is
> - * fundamentally racy. So instead just clear the current path,
> - * mark the the path as pending and kick of a re-read of the ANA
> - * log page ASAP.
> - */
> - nvme_mpath_clear_current_path(ns);
> - if (ns->ctrl->ana_log_buf) {
> - set_bit(NVME_NS_ANA_PENDING, &ns->flags);
> - queue_work(nvme_wq, &ns->ctrl->ana_work);
> - }
> - break;
> - case NVME_SC_HOST_PATH_ERROR:
> - case NVME_SC_HOST_ABORTED_CMD:
> - /*
> - * Temporary transport disruption in talking to the controller.
> - * Try to send on a new path.
> - */
> - nvme_mpath_clear_current_path(ns);
> - break;
> - default:
> - /* This was a non-ANA error so follow the normal error path. */
> - return false;
> + nvme_mpath_clear_current_path(ns);
> +
> + /*
> + * If we got back an ANA error, we know the controller is alive but not
> + * ready to serve this namespace. Kick of a re-read of the ANA
> + * information page, and just try any other available path for now.
> + */
> + if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
> + set_bit(NVME_NS_ANA_PENDING, &ns->flags);
> + queue_work(nvme_wq, &ns->ctrl->ana_work);
> }
>
> spin_lock_irqsave(&ns->head->requeue_lock, flags);
> blk_steal_bios(&ns->head->requeue_list, req);
> spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
> - blk_mq_end_request(req, 0);
>
> + blk_mq_end_request(req, 0);
> kblockd_schedule_work(&ns->head->requeue_work);
> - return true;
> }
>
> void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 6d69cc7306d034..dfcdeb318f3ab6 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -523,6 +523,32 @@ static inline u32 nvme_bytes_to_numd(size_t len)
> return (len >> 2) - 1;
> }
>
> +static inline bool nvme_is_ana_error(u16 status)
> +{
> + switch (status & 0x7ff) {
> + case NVME_SC_ANA_TRANSITION:
> + case NVME_SC_ANA_INACCESSIBLE:
> + case NVME_SC_ANA_PERSISTENT_LOSS:
> + return true;
> + default:
> + return false;
> + }
> +}
> +
> +static inline bool nvme_is_path_error(u16 status)
> +{
> + switch (status & 0x7ff) {
> + case NVME_SC_HOST_PATH_ERROR:
> + case NVME_SC_HOST_ABORTED_CMD:
> + case NVME_SC_ANA_TRANSITION:
> + case NVME_SC_ANA_INACCESSIBLE:
> + case NVME_SC_ANA_PERSISTENT_LOSS:
Suggest directly use 0x3xx,the scalability may be better.
0x300, 0x360 is already defined by nvme 1.4, but now is not used
in current code. Furthermore , nvme protocol may define more error
status for path related error.
> + return true;
> + default:
> + return false;
> + }
> +}
> +
> /*
> * Fill in the status and result information from the CQE, and then figure
> * out if we blk-mq will need to use IPI magic to complete the request, and
> @@ -635,7 +661,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
> void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
> void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
> struct nvme_ctrl *ctrl, int *flags);
> -bool nvme_failover_req(struct request *req);
> +void nvme_failover_req(struct request *req);
> void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
> int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
> void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
> @@ -694,9 +720,8 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
> sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
> }
>
> -static inline bool nvme_failover_req(struct request *req)
> +static inline void nvme_failover_req(struct request *req)
> {
> - return false;
> }
> static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
> {
>
More information about the Linux-nvme
mailing list