[PATCH 1/2] nvme-tcp: short-circuit connect retries

Sagi Grimberg sagi at grimberg.me
Thu Jul 14 07:35:41 PDT 2022


> When a reconnect attempt fails with a non-retryable status
> (eg when the subsystem has been unprovisioned) there hardly
> is any reason to retry the reconnect attempt.
> So pass the actual error status to nvme_tcp_reconnect_or_remove()
> and short-circuit retries if the DNR bit is set.
> 
> Signed-off-by: Hannes Reinecke <hare at suse.de>
> ---
>   drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
>   1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 70096a2e8762..4220c1ad6b29 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
>   	nvme_tcp_destroy_io_queues(ctrl, remove);
>   }
>   
> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
>   {
> +	bool recon = true;
> +
>   	/* If we are resetting/deleting then do nothing */
>   	if (ctrl->state != NVME_CTRL_CONNECTING) {
>   		WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
>   		return;
>   	}
>   
> -	if (nvmf_should_reconnect(ctrl)) {
> +	if (status > 0 && (status & NVME_SC_DNR)) {
> +		dev_info(ctrl->device, "reconnect failure %d\n", status);
> +		recon = false;
> +	}
> +

Why should we call this if we need to remove for sure?
I suggest we just change the call-site.

> +	if (recon && nvmf_should_reconnect(ctrl)) {
>   		dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
>   			ctrl->opts->reconnect_delay);
>   		queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
> @@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>   	struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
>   			struct nvme_tcp_ctrl, connect_work);
>   	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
> +	int ret;
>   
>   	++ctrl->nr_reconnects;
>   
> -	if (nvme_tcp_setup_ctrl(ctrl, false))
> +	ret = nvme_tcp_setup_ctrl(ctrl, false);
> +	if (ret)
>   		goto requeue;

	status = nvme_tcp_setup_ctrl(ctrl, false);
	if (!status) {
		dev_info(ctrl->device,
			"Successfully reconnected (%d attempt)\n",
                         ctrl->nr_reconnects);
		ctrl->nr_reconnects = 0;
	} else {
		dev_info(ctrl->device,
			"Failed reconnect attempt %d (%d)\n",
				ctrl->nr_reconnects, status);
		if (status > 0 && (status & NVME_SC_DNR)) {
			dev_info(ctrl->device,
				"Removing controller...\n");
			nvme_delete_ctrl(ctrl);
		} else {
			nvme_tcp_reconnect_or_remove(ctrl);
		}
	}
}

>   
>   	dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>   requeue:
>   	dev_info(ctrl->device, "Failed reconnect attempt %d\n",
>   			ctrl->nr_reconnects);
> -	nvme_tcp_reconnect_or_remove(ctrl);
> +	nvme_tcp_reconnect_or_remove(ctrl, ret);
>   }
>   
>   static void nvme_tcp_error_recovery_work(struct work_struct *work)
> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
>   		return;
>   	}
>   
> -	nvme_tcp_reconnect_or_remove(ctrl);
> +	nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);

This emphasizes that this is a redundant parameter.

>   }
>   
>   static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
> @@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
>   {
>   	struct nvme_ctrl *ctrl =
>   		container_of(work, struct nvme_ctrl, reset_work);
> +	int ret;
>   
>   	nvme_stop_ctrl(ctrl);
>   	nvme_tcp_teardown_ctrl(ctrl, false);
> @@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
>   		return;
>   	}
>   
> -	if (nvme_tcp_setup_ctrl(ctrl, false))
> +	ret = nvme_tcp_setup_ctrl(ctrl, false);
> +	if (ret)
>   		goto out_fail;

Similar error handling here.

	status = nvme_tcp_setup_ctrl(ctrl, false);
	if (status) {
		++ctrl->nr_reconnects;
		if (status > 0 && (status & NVME_SC_DNR)) {
			dev_info(ctrl->device,
				"Removing controller...\n");
			nvme_delete_ctrl(ctrl);
		} else {
			nvme_tcp_reconnect_or_remove(ctrl);
		}
	}

>   
>   	return;
>   
>   out_fail:
>   	++ctrl->nr_reconnects;
> -	nvme_tcp_reconnect_or_remove(ctrl);
> +	nvme_tcp_reconnect_or_remove(ctrl, ret);
>   }
>   
>   static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)



More information about the Linux-nvme mailing list