[PATCH v1 2/2] nvme-fc: fix race with connectivity loss with nvme_fc_create_association

Tue Jun 11 12:06:47 PDT 2024

We are racing with nvme_fc_create_association here. We wouldn't have to
do anything in nvme_fc_ctrl_connectivity_loss IF
nvme_fc_create_association would reliable detect that the setup failed.
In reality, nvme_fc_create_association is able to return success even
though some of the operation fail during the setup (and this is even on
purpose):

 nvme nvme10: NVME-FC{10}: create association : ...
 nvme nvme10: NVME-FC{10}: controller connectivity lost. Awaiting Reconnect
 nvme nvme10: queue_size 128 > ctrl maxcmd 32, reducing to maxcmd
 nvme nvme10: Could not set queue count (880)
 nvme nvme10: Failed to configure AEN (cfg 900)
 nvme nvme10: NVME-FC{10}: controller connect complete

Thus we transition to the CONNECTED state and don't start the reconnect
loop. Thus we should unconditionally schedule a reset.

If a reset has already been schedule it's either a no-op or we observe
back to back reset. But we can't really distinguish between host side
double scheduled reset work or a connectivity loss event right after a
reset. Thus we have to live with this corner case. The system will enter
the correct state eventually.

Signed-off-by: Daniel Wagner <dwagner at suse.de>
---
 drivers/nvme/host/fc.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 6ff28d59b169..10dc456928a2 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -169,6 +169,7 @@ struct nvme_fc_ctrl {
 
 	struct work_struct	ioerr_work;
 	struct delayed_work	connect_work;
+	struct work_struct	fc_reset_work;
 
 	struct kref		ref;
 	unsigned long		flags;
@@ -779,6 +780,14 @@ nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
 	return 0;
 }
 
+static void nvme_fc_defer_reset_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *ctrl =
+		container_of(work, struct nvme_fc_ctrl, fc_reset_work);
+
+	nvme_reset_ctrl(&ctrl->ctrl);
+}
+
 static void
 nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
 {
@@ -806,12 +815,20 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
 
 	case NVME_CTRL_CONNECTING:
 		/*
-		 * The association has already been terminated and the
-		 * controller is attempting reconnects.  No need to do anything
-		 * futher.  Reconnects will be attempted until either the
+		 * We are racing with nvme_fc_create_association here. Ensure we
+		 * never lose the connectivity loss event and unconditionally
+		 * schedule a reset.
+		 *
+		 * The reconnects will be attempted until either the
 		 * ctlr_loss_tmo (max_retries * connect_delay) expires or the
 		 * remoteport's dev_loss_tmo expires.
+		 *
+		 * We can't sleep in this context (e.g flush_delayed_work) but
+		 * we should execute reset after the connect work item has
+		 * finished. Thus defer the reset trigger to the nvme_wq where
+		 * the connect is executed.
 		 */
+		queue_work(nvme_wq, &ctrl->fc_reset_work);
 		break;
 
 	case NVME_CTRL_RESETTING:
@@ -3505,6 +3522,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
 	INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
+	INIT_WORK(&ctrl->fc_reset_work, nvme_fc_defer_reset_work);
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
-- 
2.45.2