[PATCH 1/2] nvme-tcp: short-circuit connect retries
Hannes Reinecke
hare at suse.de
Thu Jul 14 08:17:18 PDT 2022
On 7/14/22 16:35, Sagi Grimberg wrote:
>
>> When a reconnect attempt fails with a non-retryable status
>> (eg when the subsystem has been unprovisioned) there hardly
>> is any reason to retry the reconnect attempt.
>> So pass the actual error status to nvme_tcp_reconnect_or_remove()
>> and short-circuit retries if the DNR bit is set.
>>
>> Signed-off-by: Hannes Reinecke <hare at suse.de>
>> ---
>> drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
>> 1 file changed, 18 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
>> index 70096a2e8762..4220c1ad6b29 100644
>> --- a/drivers/nvme/host/tcp.c
>> +++ b/drivers/nvme/host/tcp.c
>> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct
>> nvme_ctrl *ctrl,
>> nvme_tcp_destroy_io_queues(ctrl, remove);
>> }
>> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
>> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int
>> status)
>> {
>> + bool recon = true;
>> +
>> /* If we are resetting/deleting then do nothing */
>> if (ctrl->state != NVME_CTRL_CONNECTING) {
>> WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
>> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct
>> nvme_ctrl *ctrl)
>> return;
>> }
>> - if (nvmf_should_reconnect(ctrl)) {
>> + if (status > 0 && (status & NVME_SC_DNR)) {
>> + dev_info(ctrl->device, "reconnect failure %d\n", status);
>> + recon = false;
>> + }
>> +
>
> Why should we call this if we need to remove for sure?
> I suggest we just change the call-site.
>
>> + if (recon && nvmf_should_reconnect(ctrl)) {
>> dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
>> ctrl->opts->reconnect_delay);
>> queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
>> @@ -2162,10 +2169,12 @@ static void
>> nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>> struct nvme_tcp_ctrl *tcp_ctrl =
>> container_of(to_delayed_work(work),
>> struct nvme_tcp_ctrl, connect_work);
>> struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
>> + int ret;
>> ++ctrl->nr_reconnects;
>> - if (nvme_tcp_setup_ctrl(ctrl, false))
>> + ret = nvme_tcp_setup_ctrl(ctrl, false);
>> + if (ret)
>> goto requeue;
>
> status = nvme_tcp_setup_ctrl(ctrl, false);
> if (!status) {
> dev_info(ctrl->device,
> "Successfully reconnected (%d attempt)\n",
> ctrl->nr_reconnects);
> ctrl->nr_reconnects = 0;
> } else {
> dev_info(ctrl->device,
> "Failed reconnect attempt %d (%d)\n",
> ctrl->nr_reconnects, status);
> if (status > 0 && (status & NVME_SC_DNR)) {
> dev_info(ctrl->device,
> "Removing controller...\n");
> nvme_delete_ctrl(ctrl);
> } else {
> nvme_tcp_reconnect_or_remove(ctrl);
> }
> }
> }
>
Good point.
>> dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
>> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct
>> work_struct *work)
>> requeue:
>> dev_info(ctrl->device, "Failed reconnect attempt %d\n",
>> ctrl->nr_reconnects);
>> - nvme_tcp_reconnect_or_remove(ctrl);
>> + nvme_tcp_reconnect_or_remove(ctrl, ret);
>> }
>> static void nvme_tcp_error_recovery_work(struct work_struct *work)
>> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct
>> work_struct *work)
>> return;
>> }
>> - nvme_tcp_reconnect_or_remove(ctrl);
>> + nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
>
> This emphasizes that this is a redundant parameter.
>
Ok, will be reworking it.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare at suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman
More information about the Linux-nvme
mailing list