[PATCH 1/2] nvme-tcp: short-circuit connect retries
Sagi Grimberg
sagi at grimberg.me
Thu Jul 14 07:35:41 PDT 2022
> When a reconnect attempt fails with a non-retryable status
> (eg when the subsystem has been unprovisioned) there hardly
> is any reason to retry the reconnect attempt.
> So pass the actual error status to nvme_tcp_reconnect_or_remove()
> and short-circuit retries if the DNR bit is set.
>
> Signed-off-by: Hannes Reinecke <hare at suse.de>
> ---
> drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 70096a2e8762..4220c1ad6b29 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
> nvme_tcp_destroy_io_queues(ctrl, remove);
> }
>
> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
> {
> + bool recon = true;
> +
> /* If we are resetting/deleting then do nothing */
> if (ctrl->state != NVME_CTRL_CONNECTING) {
> WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
> return;
> }
>
> - if (nvmf_should_reconnect(ctrl)) {
> + if (status > 0 && (status & NVME_SC_DNR)) {
> + dev_info(ctrl->device, "reconnect failure %d\n", status);
> + recon = false;
> + }
> +
Why should we call this if we need to remove for sure?
I suggest we just change the call-site.
> + if (recon && nvmf_should_reconnect(ctrl)) {
> dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
> ctrl->opts->reconnect_delay);
> queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
> @@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
> struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
> struct nvme_tcp_ctrl, connect_work);
> struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
> + int ret;
>
> ++ctrl->nr_reconnects;
>
> - if (nvme_tcp_setup_ctrl(ctrl, false))
> + ret = nvme_tcp_setup_ctrl(ctrl, false);
> + if (ret)
> goto requeue;
status = nvme_tcp_setup_ctrl(ctrl, false);
if (!status) {
dev_info(ctrl->device,
"Successfully reconnected (%d attempt)\n",
ctrl->nr_reconnects);
ctrl->nr_reconnects = 0;
} else {
dev_info(ctrl->device,
"Failed reconnect attempt %d (%d)\n",
ctrl->nr_reconnects, status);
if (status > 0 && (status & NVME_SC_DNR)) {
dev_info(ctrl->device,
"Removing controller...\n");
nvme_delete_ctrl(ctrl);
} else {
nvme_tcp_reconnect_or_remove(ctrl);
}
}
}
>
> dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
> requeue:
> dev_info(ctrl->device, "Failed reconnect attempt %d\n",
> ctrl->nr_reconnects);
> - nvme_tcp_reconnect_or_remove(ctrl);
> + nvme_tcp_reconnect_or_remove(ctrl, ret);
> }
>
> static void nvme_tcp_error_recovery_work(struct work_struct *work)
> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
> return;
> }
>
> - nvme_tcp_reconnect_or_remove(ctrl);
> + nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
This emphasizes that this is a redundant parameter.
> }
>
> static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
> @@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
> {
> struct nvme_ctrl *ctrl =
> container_of(work, struct nvme_ctrl, reset_work);
> + int ret;
>
> nvme_stop_ctrl(ctrl);
> nvme_tcp_teardown_ctrl(ctrl, false);
> @@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
> return;
> }
>
> - if (nvme_tcp_setup_ctrl(ctrl, false))
> + ret = nvme_tcp_setup_ctrl(ctrl, false);
> + if (ret)
> goto out_fail;
Similar error handling here.
status = nvme_tcp_setup_ctrl(ctrl, false);
if (status) {
++ctrl->nr_reconnects;
if (status > 0 && (status & NVME_SC_DNR)) {
dev_info(ctrl->device,
"Removing controller...\n");
nvme_delete_ctrl(ctrl);
} else {
nvme_tcp_reconnect_or_remove(ctrl);
}
}
>
> return;
>
> out_fail:
> ++ctrl->nr_reconnects;
> - nvme_tcp_reconnect_or_remove(ctrl);
> + nvme_tcp_reconnect_or_remove(ctrl, ret);
> }
>
> static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
More information about the Linux-nvme
mailing list