[PATCH 3/3] nvme-multipath: skip failed paths during partition scan

Hannes Reinecke hare at kernel.org
Mon Oct 7 03:01:34 PDT 2024


From: Hannes Reinecke <hare at suse.de>

When an I/O error is encountered during scanning (ie when the
scan_lock is held) we should avoid using this path until scanning
is finished to avoid deadlocks with device_add_disk().
So set a new flag NVME_NS_SCAN_FAILED if a failover happened during
scanning, and skip this path in nvme_available_paths().
Then we can check if that bit is set after device_add_disk() returned,
and remove the disk again if no available paths are found.
That allows the device to be recreated via the 'rescan' sysfs attribute
once no I/O errors occur anymore.

Signed-off-by: Hannes Reinecke <hare at kernel.org>
---
 drivers/nvme/host/multipath.c | 26 ++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h      |  1 +
 2 files changed, 27 insertions(+)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f03ef983a75f..4113d38606a4 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -102,6 +102,13 @@ void nvme_failover_req(struct request *req)
 		queue_work(nvme_wq, &ns->ctrl->ana_work);
 	}
 
+	/*
+	 * Do not use this path during scanning
+	 * to avoid deadlocks in device_add_disk()
+	 */
+	if (mutex_is_locked(&ns->ctrl->scan_lock))
+		set_bit(NVME_NS_SCAN_FAILED, &ns->flags);
+
 	spin_lock_irqsave(&ns->head->requeue_lock, flags);
 	for (bio = req->bio; bio; bio = bio->bi_next) {
 		bio_set_dev(bio, ns->head->disk->part0);
@@ -434,6 +441,10 @@ static bool nvme_available_path(struct nvme_ns_head *head)
 	list_for_each_entry_rcu(ns, &head->list, siblings) {
 		if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
 			continue;
+		if (test_bit(NVME_NS_SCAN_FAILED, &ns->flags) &&
+		    mutex_is_locked(&ns->ctrl->scan_lock))
+			continue;
+
 		switch (nvme_ctrl_state(ns->ctrl)) {
 		case NVME_CTRL_LIVE:
 		case NVME_CTRL_RESETTING:
@@ -659,6 +670,20 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
 			clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags);
 			return;
 		}
+		/*
+		 * If there is no available path and NVME_NS_SCAN_FAILED is
+		 * set an error occurred during partition scan triggered
+		 * by device_add_disk(), and the disk is most certainly
+		 * not live.
+		 */
+		if (!nvme_available_path(head) &&
+		    test_and_clear_bit(NVME_NS_SCAN_FAILED, &ns->flags)) {
+			dev_dbg(ns->ctrl->device, "delete gendisk for nsid %d\n",
+				head->ns_id);
+			clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags);
+			del_gendisk(head->disk);
+			return;
+		}
 		nvme_add_ns_head_cdev(head);
 	}
 
@@ -732,6 +757,7 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
 	ns->ana_grpid = le32_to_cpu(desc->grpid);
 	ns->ana_state = desc->state;
 	clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
+	clear_bit(NVME_NS_SCAN_FAILED, &ns->flags);
 	/*
 	 * nvme_mpath_set_live() will trigger I/O to the multipath path device
 	 * and in turn to this path device.  However we cannot accept this I/O
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 50515ad0f9d6..a4f99873ecb7 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -527,6 +527,7 @@ struct nvme_ns {
 #define NVME_NS_ANA_PENDING	2
 #define NVME_NS_FORCE_RO	3
 #define NVME_NS_READY		4
+#define NVME_NS_SCAN_FAILED	5
 
 	struct cdev		cdev;
 	struct device		cdev_device;
-- 
2.35.3




More information about the Linux-nvme mailing list