[PATCH v6 5/5] nvme-fabrics: handle transient auth failures

Daniel Wagner dwagner at suse.de
Mon Apr 15 05:42:20 PDT 2024


Retry authentication a couple of times to avoid error out on transient
errors. E.g. if a new key is deployed on the host and the target. At the
same time the connection drops. The host might use the wrong key to
reconnect.

Suggested-by: Sagi Grimberg <sagi at grimberg.me>
Signed-off-by: Daniel Wagner <dwagner at suse.de>
---
 drivers/nvme/host/auth.c    |  4 ++--
 drivers/nvme/host/fabrics.c | 10 +++++++++-
 drivers/nvme/host/fc.c      |  2 ++
 drivers/nvme/host/nvme.h    |  1 +
 drivers/nvme/host/rdma.c    |  1 +
 drivers/nvme/host/tcp.c     |  1 +
 6 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index a264b3ae078b..368dcc6ee11b 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -797,7 +797,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
 					 NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1);
 	if (ret) {
 		chap->status = ret;
-		chap->error = -ECONNREFUSED;
+		chap->error = -EKEYREJECTED;
 		return;
 	}
 
@@ -818,7 +818,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
 	ret = nvme_auth_process_dhchap_success1(ctrl, chap);
 	if (ret) {
 		/* Controller authentication failed */
-		chap->error = -ECONNREFUSED;
+		chap->error = -EKEYREJECTED;
 		goto fail2;
 	}
 
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index d9a73b1b41c4..6dfa45dce232 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -573,8 +573,16 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
  */
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status)
 {
-	if (status < 0)
+	if (status < 0) {
+		/*
+		 * authentication errors can be transient, thus retry a couple
+		 * of times before giving up.
+		 */
+		if (status == -EKEYREJECTED &&
+		    ++ctrl->nr_auth_retries < 3)
+			return true;
 		return false;
+	}
 	else if (status > 0 && (status & NVME_SC_DNR))
 		return false;
 
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index f0b081332749..a22d2af18553 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3176,6 +3176,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 
 	ctrl->ctrl.nr_reconnects = 0;
+	ctrl->ctrl.nr_auth_retries = 0;
 
 	if (changed)
 		nvme_start_ctrl(&ctrl->ctrl);
@@ -3491,6 +3492,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	ctrl->ctrl.opts = opts;
 	ctrl->ctrl.nr_reconnects = 0;
+	ctrl->ctrl.nr_auth_retries = 0;
 	INIT_LIST_HEAD(&ctrl->ctrl_list);
 	ctrl->lport = lport;
 	ctrl->rport = rport;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9b8904a476b8..a9af20f25405 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -382,6 +382,7 @@ struct nvme_ctrl {
 	u16 icdoff;
 	u16 maxcmd;
 	int nr_reconnects;
+	int nr_auth_retries;
 	unsigned long flags;
 	struct nvmf_ctrl_options *opts;
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 821ab3e0fd3b..f5b3201a11b5 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1117,6 +1117,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 			ctrl->ctrl.nr_reconnects);
 
 	ctrl->ctrl.nr_reconnects = 0;
+	ctrl->ctrl.nr_auth_retries = 0;
 
 	return;
 
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3e0c33323320..45270c626c87 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2266,6 +2266,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 			ctrl->nr_reconnects);
 
 	ctrl->nr_reconnects = 0;
+	ctrl->nr_auth_retries = 0;
 
 	return;
 
-- 
2.44.0




More information about the Linux-nvme mailing list