[PATCH 02/10] nvme-fabrics: introduce nvmf_reconnect_or_remove API
James Smart
jsmart2021 at gmail.com
Tue Nov 2 16:38:34 PDT 2021
On 10/20/2021 3:38 AM, Max Gurtovoy wrote:
> This logic is duplicated today for RDMA and TCP controllers. Move it to
> the fabrics driver and export it as a new API.
>
> Also update the RDMA/TCP transport drivers to use this API and remove
> the duplicated code.
>
> Reviewed-by: Israel Rukshin <israelr at nvidia.com>
> Reviewed-by: Chaitanya Kulkarni <kch at nvidia.com>
> Reviewed-by: Hannes Reinecke <hare at suse.de>
> Signed-off-by: Max Gurtovoy <mgurtovoy at nvidia.com>
> ---
> drivers/nvme/host/fabrics.c | 21 +++++++++++++++++++++
> drivers/nvme/host/fabrics.h | 1 +
> drivers/nvme/host/rdma.c | 25 +++----------------------
> drivers/nvme/host/tcp.c | 26 +++-----------------------
> 4 files changed, 28 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 668c6bb7a567..4a1ef67c6fb3 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -472,6 +472,27 @@ bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
> }
> EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
>
> +void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl)
> +{
> + /* If we are resetting/deleting then do nothing */
> + if (ctrl->state != NVME_CTRL_CONNECTING) {
> + WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
> + ctrl->state == NVME_CTRL_LIVE);
> + return;
> + }
> +
> + if (nvmf_should_reconnect(ctrl)) {
> + dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
> + ctrl->opts->reconnect_delay);
> + queue_delayed_work(nvme_wq, &ctrl->connect_work,
> + ctrl->opts->reconnect_delay * HZ);
> + } else {
> + dev_info(ctrl->device, "Removing controller...\n");
> + nvme_delete_ctrl(ctrl);
> + }
> +}
> +EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);
> +
This won't be sufficient for FC so it can't use it. I'd have to think
if there's a way to restructure or wrapper it. But not a great fit.
I do think what FC is doing relative to NVME_SC_DNR should be done in
rdma/tcp as well.
In other words, this should minimally be:
void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
{
/* If we are resetting/deleting then do nothing */
if (ctrl->state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
ctrl->state == NVME_CTRL_LIVE);
return;
}
if (!(status > 0 && status & NVME_SC_DNR) &&
nvmf_should_reconnect(ctrl)) {
dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
ctrl->opts->reconnect_delay);
queue_delayed_work(nvme_wq, &ctrl->connect_work,
ctrl->opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->device, "Removing controller...\n");
nvme_delete_ctrl(ctrl);
}
}
EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);
then change the callee's to set status to pass the return value from the
status that caused the reschedule. It'll either be set to a -Exxx value
or to a NVME status code returned by one of the core routines during the
controller init. This allows an uncorrectable failure during controller
init will just fail w/o rescheduling.
...
> @@ -1181,7 +1162,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
> requeue:
> dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
> ctrl->ctrl.nr_reconnects);
> - nvme_rdma_reconnect_or_remove(ctrl);
> + nvmf_reconnect_or_remove(&ctrl->ctrl);
This would become:
@@ -2,10 +2,12 @@ static void nvme_rdma_reconnect_ctrl_wor
{
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_rdma_ctrl, reconnect_work);
+ int ret;
++ctrl->ctrl.nr_reconnects;
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -18,5 +20,5 @@ static void nvme_rdma_reconnect_ctrl_wor
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects);
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
> }
>
> static void nvme_rdma_error_recovery_work(struct work_struct *work)
> @@ -1202,7 +1183,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
> return;
> }
>
> - nvme_rdma_reconnect_or_remove(ctrl);
> + nvmf_reconnect_or_remove(&ctrl->ctrl);
> }
@@ -16,5 +16,5 @@ static void nvme_rdma_error_recovery_wor
return;
}
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, 0);
}
>
> static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
> @@ -2265,7 +2246,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
>
> out_fail:
> ++ctrl->ctrl.nr_reconnects;
> - nvme_rdma_reconnect_or_remove(ctrl);
> + nvmf_reconnect_or_remove(&ctrl->ctrl);
> }
@@ -2,6 +2,7 @@ static void nvme_rdma_reset_ctrl_work(st
{
struct nvme_rdma_ctrl *ctrl =
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+ int ret;
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -12,12 +13,13 @@ static void nvme_rdma_reset_ctrl_work(st
return;
}
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;
return;
out_fail:
++ctrl->ctrl.nr_reconnects;
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
And similar mods to tcp.
-- james
More information about the Linux-nvme
mailing list