[PATCH 1/2] nvme-mpath: delete disk after last connection

Hannes Reinecke hare at suse.de
Wed Mar 31 15:53:50 BST 2021


From: Keith Busch <kbusch at kernel.org>

The multipath code currently deletes the disk only after all references
to it are dropped rather than when the last path to that disk is lost.
This has been reported to cause problems with some usage, like MD RAID.

Delete the disk when the last path is gone. This is the same behavior we
currently have with non-multipathed nvme devices.

The following is just a simple example that demonstrates what is currently
observed using a simple nvme loop back (loop setup file not shown):

 # nvmetcli restore loop.json
 [   31.156452] nvmet: adding nsid 1 to subsystem testnqn1
 [   31.159140] nvmet: adding nsid 1 to subsystem testnqn2

 # nvme connect -t loop -n testnqn1 -q hostnqn
 [   36.866302] nvmet: creating controller 1 for subsystem testnqn1 for NQN hostnqn.
 [   36.872926] nvme nvme3: new ctrl: "testnqn1"

 # nvme connect -t loop -n testnqn1 -q hostnqn
 [   38.227186] nvmet: creating controller 2 for subsystem testnqn1 for NQN hostnqn.
 [   38.234450] nvme nvme4: new ctrl: "testnqn1"

 # nvme connect -t loop -n testnqn2 -q hostnqn
 [   43.902761] nvmet: creating controller 3 for subsystem testnqn2 for NQN hostnqn.
 [   43.907401] nvme nvme5: new ctrl: "testnqn2"

 # nvme connect -t loop -n testnqn2 -q hostnqn
 [   44.627689] nvmet: creating controller 4 for subsystem testnqn2 for NQN hostnqn.
 [   44.641773] nvme nvme6: new ctrl: "testnqn2"

 # mdadm --create /dev/md0 --level=mirror --raid-devices=2 /dev/nvme3n1 /dev/nvme5n1
 [   53.497038] md/raid1:md0: active with 2 out of 2 mirrors
 [   53.501717] md0: detected capacity change from 0 to 66060288

 # cat /proc/mdstat
 Personalities : [raid1]
 md0 : active raid1 nvme5n1[1] nvme3n1[0]
       64512 blocks super 1.2 [2/2] [UU]

Now delete all paths to one of the namespaces:

 # echo 1 > /sys/class/nvme/nvme3/delete_controller
 # echo 1 > /sys/class/nvme/nvme4/delete_controller

We have no path, but mdstat says:

 # cat /proc/mdstat
 Personalities : [raid1]
 md0 : active (auto-read-only) raid1 nvme5n1[1]
       64512 blocks super 1.2 [2/1] [_U]

And this is reported to cause a problem.

With the proposed patch, the following messages appear:

 [  227.516807] md/raid1:md0: Disk failure on nvme3n1, disabling device.
 [  227.516807] md/raid1:md0: Operation continuing on 1 devices.

And mdstat shows only the viable members:

 # cat /proc/mdstat
 Personalities : [raid1]
 md0 : active (auto-read-only) raid1 nvme5n1[1]
       64512 blocks super 1.2 [2/1] [_U]

Reported-by: Hannes Reinecke <hare at suse.de>
Signed-off-by: Keith Busch <kbusch at kernel.org>
Signed-off-by: Hannes Reinecke <hare at suse.de>
---
 drivers/nvme/host/core.c      | 5 ++++-
 drivers/nvme/host/multipath.c | 1 -
 drivers/nvme/host/nvme.h      | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 40215a0246e4..ee898c8da786 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -542,7 +542,10 @@ static void nvme_free_ns_head(struct kref *ref)
 	struct nvme_ns_head *head =
 		container_of(ref, struct nvme_ns_head, ref);
 
-	nvme_mpath_remove_disk(head);
+#ifdef CONFIG_NVME_MULTIPATH
+	if (head->disk)
+		put_disk(head->disk);
+#endif
 	ida_simple_remove(&head->subsys->ns_ida, head->instance);
 	cleanup_srcu_struct(&head->srcu);
 	nvme_put_subsystem(head->subsys);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a1d476e1ac02..adea060d1470 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -702,7 +702,6 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 		 */
 		head->disk->queue = NULL;
 	}
-	put_disk(head->disk);
 }
 
 int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index b0863c59fac4..6b8992774998 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -680,7 +680,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 	struct nvme_ns_head *head = ns->head;
 
 	if (head->disk && list_empty(&head->list))
-		kblockd_schedule_work(&head->requeue_work);
+		nvme_mpath_remove_disk(head);
 }
 
 static inline void nvme_trace_bio_complete(struct request *req)
-- 
2.29.2




More information about the Linux-nvme mailing list