[PATCHv7] nvme: fix refcounting imbalance when all paths are down

Hannes Reinecke hare at suse.de
Tue Jun 22 00:01:40 PDT 2021


When the last path to a ns_head drops the current code
removes the ns_head from the subsystem list, but will only
delete the disk itself if the last reference to the ns_head
drops. This is causing an refcounting imbalance eg when
applications have a reference to the disk, as then they'll
never get notified that the disk is in fact dead.
With this patch we are calling 'del_gendisk' and remove it
from the list of ns_heads at the same time, ensuring that
the disk can be properly removed and applications get the
appropriate notifications.

Changes to v6:
- Move the list_del() into nvme_mpath_check_last_path()
- Drop the tests for GENHD_FL_UP
Changes to v5:
- Synchronize between nvme_init_ns_head() and nvme_mpath_check_last_path()
- Check for removed gendisk in nvme_ns_head_submit_bio()
Changes to v4:
- Call del_gendisk() in nvme_mpath_check_last_path() to avoid deadlock
Changes to v3:
- Simplify if() clause to detect duplicate namespaces
Changes to v2:
- Drop memcpy() statement
Changes to v1:
- Always check NSIDs after reattach

Signed-off-by: Hannes Reinecke <hare at suse.de>
---
 drivers/nvme/host/core.c      |  6 +-----
 drivers/nvme/host/multipath.c | 28 +++++++++++++++++++++++-----
 drivers/nvme/host/nvme.h      | 11 ++---------
 3 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c7ef0b6684b5..106322dafe45 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3799,8 +3799,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
  out_unlink_ns:
 	mutex_lock(&ctrl->subsys->lock);
 	list_del_rcu(&ns->siblings);
-	if (list_empty(&ns->head->list))
-		list_del_init(&ns->head->entry);
 	mutex_unlock(&ctrl->subsys->lock);
 	nvme_put_ns_head(ns->head);
  out_free_queue:
@@ -3821,8 +3819,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 
 	mutex_lock(&ns->ctrl->subsys->lock);
 	list_del_rcu(&ns->siblings);
-	if (list_empty(&ns->head->list))
-		list_del_init(&ns->head->entry);
 	mutex_unlock(&ns->ctrl->subsys->lock);
 
 	synchronize_rcu(); /* guarantee not available in head->list */
@@ -3842,7 +3838,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 	list_del_init(&ns->list);
 	up_write(&ns->ctrl->namespaces_rwsem);
 
-	nvme_mpath_check_last_path(ns);
+	nvme_mpath_check_last_path(ns->head);
 	nvme_put_ns(ns);
 }
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 23573fe3fc7d..bf9dfaefa77d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -771,20 +771,38 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
 #endif
 }
 
-void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+void nvme_mpath_check_last_path(struct nvme_ns_head *head)
 {
+	bool last_path = false;
 	if (!head->disk)
 		return;
-	if (head->disk->flags & GENHD_FL_UP) {
-		nvme_cdev_del(&head->cdev, &head->cdev_device);
-		del_gendisk(head->disk);
+
+	/* Synchronize with nvme_init_ns_head() */
+	mutex_lock(&head->subsys->lock);
+	if (list_empty(&head->list)) {
+		list_del_init(&head->entry);
+		last_path = true;
 	}
+	mutex_unlock(&head->subsys->lock);
+	if (last_path) {
+		kblockd_schedule_work(&head->requeue_work);
+		if (head->disk->flags & GENHD_FL_UP) {
+			nvme_cdev_del(&head->cdev, &head->cdev_device);
+			del_gendisk(head->disk);
+		}
+	}
+}
+
+void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+{
+	if (!head->disk)
+		return;
 	blk_set_queue_dying(head->disk->queue);
 	/* make sure all pending bios are cleaned up */
 	kblockd_schedule_work(&head->requeue_work);
 	flush_work(&head->requeue_work);
 	blk_cleanup_queue(head->disk->queue);
-	if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
+	if (!test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
 		/*
 		 * if device_add_disk wasn't called, prevent
 		 * disk release to put a bogus reference on the
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 75420ceacc10..6b1caabe861b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -716,14 +716,7 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
-
-static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
-{
-	struct nvme_ns_head *head = ns->head;
-
-	if (head->disk && list_empty(&head->list))
-		kblockd_schedule_work(&head->requeue_work);
-}
+void nvme_mpath_check_last_path(struct nvme_ns_head *head);
 
 static inline void nvme_trace_bio_complete(struct request *req)
 {
@@ -772,7 +765,7 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
 static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 {
 }
-static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+static inline void nvme_mpath_check_last_path(struct nvme_ns_head *head)
 {
 }
 static inline void nvme_trace_bio_complete(struct request *req)
-- 
2.26.2




More information about the Linux-nvme mailing list