[PATCH] nvme: re-read ANA log on NS CHANGED AEN

Hannes Reinecke hare at suse.de
Fri Dec 4 10:03:10 EST 2020


As discussed on the mailing list we might be getting an NS CHANGED AEN
for a namespace on a newly created ANA group, and due to optimisations
within the spec no corresponding ANA CHANGED AEN will be send.
Ideally we would re-read the ANA log when we're figuring out that
no ANA Group exists, but that code is hidden behind two stacked void
functions, so it'll be impossible to recover from an error on reading
the ANA log.
Instead this patch re-reads the ANA log on every NS CHANGED AEN prior
to scanning the namespaces; that will resolve the situation, too,
but doesn't risk into running into an unrecoverable error.

Reported-by: Martin George <marting at netapp.com>
Signed-off-by: Hannes Reinecke <hare at suse.de>
---
 drivers/nvme/host/core.c      |  8 +++++++-
 drivers/nvme/host/multipath.c | 18 ++++++++++++++----
 drivers/nvme/host/nvme.h      |  1 +
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 95ef4943d8bd..084a05442ac2 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -123,7 +123,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
 	nvme_update_bdev_size(ns->disk);
 }
 
-static void nvme_queue_scan(struct nvme_ctrl *ctrl)
+void nvme_queue_scan(struct nvme_ctrl *ctrl)
 {
 	/*
 	 * Only new queue scan work when admin and IO queues are both alive
@@ -4292,6 +4292,12 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
 	switch (aer_notice_type) {
 	case NVME_AER_NOTICE_NS_CHANGED:
 		set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
+#ifdef CONFIG_NVME_MULTIPATH
+		if (ctrl->ana_log_buf) {
+			queue_work(nvme_wq, &ctrl->ana_work);
+			break;
+		}
+#endif
 		nvme_queue_scan(ctrl);
 		break;
 	case NVME_AER_NOTICE_FW_ACT_STARTING:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 74896be40c17..b920d57d0e39 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -535,10 +535,16 @@ static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
 		goto out_unlock;
 	}
 
-	error = nvme_parse_ana_log(ctrl, &nr_change_groups,
-			nvme_update_ana_state);
-	if (error)
-		goto out_unlock;
+	/*
+	 * Don't update ANA groups if triggered by an NS CHANGED
+	 * AEN; we'll be rescanning all namespaces anyway afterwards.
+	 */
+	if (!test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
+		error = nvme_parse_ana_log(ctrl, &nr_change_groups,
+					   nvme_update_ana_state);
+		if (error)
+			goto out_unlock;
+	}
 
 	/*
 	 * In theory we should have an ANATT timer per group as they might enter
@@ -557,6 +563,10 @@ static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
 		del_timer_sync(&ctrl->anatt_timer);
 out_unlock:
 	mutex_unlock(&ctrl->ana_lock);
+
+	if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events))
+		nvme_queue_scan(ctrl);
+
 	return error;
 }
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index cc111136a981..b0d01c2ce092 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -589,6 +589,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
 void nvme_start_ctrl(struct nvme_ctrl *ctrl);
 void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_identify(struct nvme_ctrl *ctrl);
+void nvme_queue_scan(struct nvme_ctrl *ctrl);
 
 void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
 
-- 
2.16.4




More information about the Linux-nvme mailing list