[RFC PATCH v1 7/7] nvme-tcp: Do not immediately cancel inflight requests during recovery

Mohamed Khalfella mkhalfella at purestorage.com
Mon Mar 24 10:49:00 PDT 2025


In case of controller recovery, reset, or deletion it is possible there
are inflight requests for which no response was received from target
controller. TP4129 requires such inflight requests to not be canceled
immediately. Instead these requests should be held until the target
learns about disconnection and quiesce pending nvme commands. Implement
this for nvme-tcp.

Signed-off-by: Mohamed Khalfella <mkhalfella at purestorage.com>
---
 drivers/nvme/host/tcp.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 327e37a25281..822e6329e332 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2252,12 +2252,14 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
 	return error;
 }
 
-static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl)
+static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
+					  bool hold_reqs)
 {
 	nvme_quiesce_admin_queue(ctrl);
 	blk_sync_queue(ctrl->admin_q);
 	nvme_tcp_stop_queue(ctrl, 0);
-	nvme_cancel_admin_tagset(ctrl);
+	if (!hold_reqs)
+		nvme_cancel_admin_tagset(ctrl);
 	nvme_tcp_free_admin_queue(ctrl);
 	if (ctrl->tls_pskid) {
 		dev_dbg(ctrl->device, "Wipe negotiated TLS_PSK %08x\n",
@@ -2274,12 +2276,14 @@ static void nvme_tcp_suspend_io_queues(struct nvme_ctrl *ctrl)
 	nvme_sync_io_queues(ctrl);
 }
 
-static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl)
+static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
+					bool hold_reqs)
 {
 	if (ctrl->queue_count <= 1)
 		return;
 	nvme_tcp_stop_io_queues(ctrl);
-	nvme_cancel_tagset(ctrl);
+	if (!hold_reqs)
+		nvme_cancel_tagset(ctrl);
 	nvme_tcp_free_io_queues(ctrl);
 }
 
@@ -2375,7 +2379,7 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
 	}
 destroy_admin:
 	nvme_stop_keep_alive(ctrl);
-	nvme_tcp_teardown_admin_queue(ctrl);
+	nvme_tcp_teardown_admin_queue(ctrl, false);
 	if (new) {
 		nvme_unquiesce_admin_queue(ctrl);
 		nvme_remove_admin_tag_set(ctrl);
@@ -2418,10 +2422,12 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
 	nvme_tcp_suspend_io_queues(ctrl);
-	nvme_tcp_teardown_io_queues(ctrl);
+	nvme_tcp_teardown_io_queues(ctrl, true);
+	nvme_tcp_teardown_admin_queue(ctrl, true);
+	if (nvme_queue_held_requests_work(ctrl))
+		nvme_wait_for_held_requests(ctrl);
 	/* unquiesce to fail fast pending requests */
 	nvme_unquiesce_io_queues(ctrl);
-	nvme_tcp_teardown_admin_queue(ctrl);
 	nvme_unquiesce_admin_queue(ctrl);
 	nvme_auth_stop(ctrl);
 
@@ -2439,11 +2445,15 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 
 static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
 {
+	bool hold_reqs;
+
 	nvme_tcp_suspend_io_queues(ctrl);
-	nvme_tcp_teardown_io_queues(ctrl);
 	nvme_quiesce_admin_queue(ctrl);
-	nvme_disable_ctrl(ctrl, shutdown);
-	nvme_tcp_teardown_admin_queue(ctrl);
+	hold_reqs = nvme_disable_ctrl(ctrl, shutdown);
+	nvme_tcp_teardown_io_queues(ctrl, hold_reqs);
+	nvme_tcp_teardown_admin_queue(ctrl, hold_reqs);
+	if (hold_reqs && nvme_queue_held_requests_work(ctrl))
+		nvme_wait_for_held_requests(ctrl);
 }
 
 static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl)
-- 
2.48.1




More information about the Linux-nvme mailing list