[PATCH] nvme: invalidate paths during rescan

Daniel Wagner dwagner at suse.de
Thu Jul 29 12:28:14 PDT 2021


From: Hannes Reinecke <hare at suse.de>

When triggering a rescan due to a namespace resize we will be
receiving AENs on every controller, triggering a rescan of all
attached namespaces. If multipath is active only the current path and
the ns_head disk will be updated, the other paths will still refer to
the old size until AENs for the remaining controllers are received.

If I/O comes in before that it might be routed to one of the old
paths, triggering an I/O failure with 'access beyond end of device'.
With this patch the old paths are skipped from multipath path
selection until the controller serving these paths has been rescanned.

Signed-off-by: Hannes Reinecke <hare at suse.de>
Tested-by: Daniel Wagner <dwagner at suse.de>
Signed-off-by: Daniel Wagner <dwagner at suse.de>
---

Hi,

I was able to reproduce the exact scenario Hannes describes.

My setup is
  - iopolicy: round-robin
  - add 10 s sleep between the rescaning of each controller.
    this increased the likelyhood to get I/O on both paths.
  - increase the backing block device on the target (lvm)
  - as there was no AEN, trigger rescan by 'nvme ns-rescan'
  - monitor the two connection (tcp) via iptables on the
    target

Here is some debug output from my session. I think you get the idea
where I placced the printks :)

 nvme nvme17: rescanning namespaces.
 nvme16n1: detected capacity change from 4603904 to 4808704
 capacity = 4808704
 get_capacity(ns->disk) = 4808704
 get_capacity(ns->disk) = 4603904
 ns 00000000d0596eba invalidated
 nvme nvme16: rescanning namespaces.
 capacity = 4808704
 get_capacity(ns->disk) = 4808704
 get_capacity(ns->disk) = 4808704
 capacity = 4808704
 get_capacity(ns->disk) = 4808704
 get_capacity(ns->disk) = 4808704

Without the patch I got reliable 'access beyond', which was issued
by blkid. Also I could see the I/O traffic moved to one connection
and after the second rescan, the I/Os where evenly distributed
on the two connections again.

 drivers/nvme/host/core.c      |  2 ++
 drivers/nvme/host/multipath.c | 16 ++++++++++++++++
 drivers/nvme/host/nvme.h      | 17 +++++++++++++++++
 3 files changed, 35 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dfd9dec0c1f6..20a079083129 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1877,6 +1877,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
 			goto out_unfreeze;
 	}
 
+	clear_bit(NVME_NS_INVALIDATED, &ns->flags);
 	blk_mq_unfreeze_queue(ns->disk->queue);
 
 	if (blk_queue_is_zoned(ns->queue)) {
@@ -1888,6 +1889,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
 	if (nvme_ns_head_multipath(ns->head)) {
 		blk_mq_freeze_queue(ns->head->disk->queue);
 		nvme_update_disk_info(ns->head->disk, ns, id);
+		nvme_mpath_invalidate_paths(ns);
 		blk_stack_limits(&ns->head->disk->queue->limits,
 				 &ns->queue->limits, 0);
 		blk_queue_update_readahead(ns->head->disk->queue);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 3f32c5e86bfc..579020ae058d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -147,6 +147,21 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 	mutex_unlock(&ctrl->scan_lock);
 }
 
+void nvme_mpath_invalidate_paths(struct nvme_ns *ns)
+{
+	struct nvme_ns_head *head = ns->head;
+	sector_t capacity = get_capacity(head->disk);
+	int node;
+
+	for_each_node(node)
+		rcu_assign_pointer(head->current_path[node], NULL);
+
+	list_for_each_entry_rcu(ns, &head->list, siblings) {
+		if (capacity != get_capacity(ns->disk))
+			set_bit(NVME_NS_INVALIDATED, &ns->flags);
+	}
+}
+
 static bool nvme_path_is_disabled(struct nvme_ns *ns)
 {
 	/*
@@ -158,6 +173,7 @@ static bool nvme_path_is_disabled(struct nvme_ns *ns)
 	    ns->ctrl->state != NVME_CTRL_DELETING)
 		return true;
 	if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
+	    test_bit(NVME_NS_INVALIDATED, &ns->flags) ||
 	    test_bit(NVME_NS_REMOVING, &ns->flags))
 		return true;
 	return false;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5cd1fa3b8464..ea8933b55bba 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -467,6 +467,7 @@ struct nvme_ns {
 #define NVME_NS_DEAD     	1
 #define NVME_NS_ANA_PENDING	2
 #define NVME_NS_FORCE_RO	3
+#define NVME_NS_INVALIDATED	4
 
 	struct cdev		cdev;
 	struct device		cdev_device;
@@ -715,8 +716,18 @@ void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
 void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
+void nvme_mpath_invalidate_paths(struct nvme_ns *ns);
 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
 void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
+struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
+
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+	struct nvme_ns_head *head = ns->head;
+
+	if (head->disk && list_empty(&head->list))
+		kblockd_schedule_work(&head->requeue_work);
+}
 
 static inline void nvme_trace_bio_complete(struct request *req)
 {
@@ -762,12 +773,18 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
 	return false;
 }
+static inline void nvme_mpath_invalidate_paths(struct nvme_ns *ns)
+{
+}
 static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 {
 }
 static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
 {
 }
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+}
 static inline void nvme_trace_bio_complete(struct request *req)
 {
 }
-- 
2.29.2




More information about the Linux-nvme mailing list