[PATCH 2/3] nvme-multipath: cannot disconnect controller on stuck partition scan
Keith Busch
kbusch at kernel.org
Wed Oct 9 10:32:32 PDT 2024
On Wed, Oct 09, 2024 at 08:23:45AM +0200, Hannes Reinecke wrote:
> With my testcase _all_ paths return NS_NOT_READY during partition scan, so
> I/O is constantly bounced between paths, and partition scan never returns.
> Doesn't matter where you call it, it's stuck.
Assuming you mean a differet status, like NVME_SC_INTERNAL_PATH_ERROR, I
can recreate a stuck scan. Let me get one more shot at fixing this by
suppressing the scan to another context. This one is successful in my
tests:
---
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 48e7a8906d012..e1fd53ff2c0ca 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -580,6 +580,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
return ret;
}
+static void nvme_scan_work(struct work_struct *work)
+{
+ struct nvme_ns_head *head =
+ container_of(work, struct nvme_ns_head, scan_work);
+
+ if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
+ &head->disk->state)))
+ return;
+
+ mutex_lock(&head->disk->open_mutex);
+ bdev_disk_changed(head->disk, false);
+ mutex_unlock(&head->disk->open_mutex);
+}
+
static void nvme_requeue_work(struct work_struct *work)
{
struct nvme_ns_head *head =
@@ -606,6 +620,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
bio_list_init(&head->requeue_list);
spin_lock_init(&head->requeue_lock);
INIT_WORK(&head->requeue_work, nvme_requeue_work);
+ INIT_WORK(&head->scan_work, nvme_scan_work);
/*
* Add a multipath node if the subsystems supports multiple controllers.
@@ -629,6 +644,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
return PTR_ERR(head->disk);
head->disk->fops = &nvme_ns_head_ops;
head->disk->private_data = head;
+ set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
sprintf(head->disk->disk_name, "nvme%dn%d",
ctrl->subsys->instance, head->instance);
return 0;
@@ -655,6 +671,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
return;
}
nvme_add_ns_head_cdev(head);
+ kblockd_schedule_work(&head->scan_work);
}
mutex_lock(&head->lock);
@@ -974,14 +991,14 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
return;
if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
nvme_cdev_del(&head->cdev, &head->cdev_device);
+ /*
+ * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
+ * to allow multipath to fail all I/O.
+ */
+ synchronize_srcu(&head->srcu);
+ kblockd_schedule_work(&head->requeue_work);
del_gendisk(head->disk);
}
- /*
- * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
- * to allow multipath to fail all I/O.
- */
- synchronize_srcu(&head->srcu);
- kblockd_schedule_work(&head->requeue_work);
}
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -991,6 +1008,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
+ flush_work(&head->scan_work);
put_disk(head->disk);
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 313a4f978a2cf..ff3d092dbdb6d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -494,6 +494,7 @@ struct nvme_ns_head {
struct bio_list requeue_list;
spinlock_t requeue_lock;
struct work_struct requeue_work;
+ struct work_struct scan_work;
struct mutex lock;
unsigned long flags;
#define NVME_NSHEAD_DISK_LIVE 0
--
More information about the Linux-nvme
mailing list