[PATCH] nvme-multipath: fix lockdep warning on shutdown

hare at kernel.org hare at kernel.org
Thu Jan 23 23:14:39 PST 2025


From: Hannes Reinecke <hare at kernel.org>

During shutdown of multipath devices lockdep complained about a
potential circular locking:

WARNING: possible circular locking dependency detected
(udev-worker)/2792 is trying to acquire lock:
ffff8881012a4348 ((wq_completion)kblockd){+.+.}-{0:0}, at: touch_wq_lockdep_map+0
x26/0x90

but task is already holding lock:
ffff88811e4b7cc8 (&disk->open_mutex){+.+.}-{4:4}, at: bdev_release+0x61/0x1a0
which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:
-> #2 (&disk->open_mutex){+.+.}-{4:4}:
        __mutex_lock+0xa5/0xe00
       nvme_partition_scan_work+0x31/0x60
        process_scheduled_works+0x37c/0x6f0
-> #1 ((work_completion)(&head->partition_scan_work)){+.+.}-{0:0}:
        process_scheduled_works+0x348/0x6f0
        worker_thread+0x127/0x2a0
-> #0 ((wq_completion)kblockd){+.+.}-{0:0}:
        __lock_acquire+0x11f9/0x1790
        lock_acquire+0x245/0x2d0
        touch_wq_lockdep_map+0x3b/0x90
        __flush_work+0x240/0x4b0
        nvme_mpath_remove_disk+0x2b/0x50
        nvme_free_ns_head+0x19/0x90

So the problem is that nvme_mpath_remove_disk() is called with the
disk->open_mutex held, hence calling flush_work on partition_scan_work
(which also will try to lock disk->open_mutex) will deadlock.
Fix this by checking for NVME_NSHEAD_DISK_LIVE before trying to lock
disk->open_mutex.

Fixes: 1f021341eef4 ("nvme-multipath: defer partition scanning")

Signed-off-by: Hannes Reinecke <hare at kernel.org>
---
 block/blk-ioprio.c                |  6 ++++-
 drivers/nvme/host/multipath.c     |  2 ++
 drivers/nvme/target/core.c        | 42 +++++++++++++++----------------
 drivers/nvme/target/io-cmd-bdev.c |  9 +++++++
 4 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 8fff7ccc0ac7..9f1b2069a3c9 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -141,9 +141,13 @@ static struct blkcg_policy ioprio_policy = {
 
 void blkcg_set_ioprio(struct bio *bio)
 {
-	struct ioprio_blkcg *blkcg = blkcg_to_ioprio_blkcg(bio->bi_blkg->blkcg);
+	struct ioprio_blkcg *blkcg;
 	u16 prio;
 
+	if (WARN_ON(!bio->bi_blkg || ! bio->bi_blkg->blkcg))
+		return;
+
+	blkcg = blkcg_to_ioprio_blkcg(bio->bi_blkg->blkcg);
 	if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
 		return;
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a85d190942bd..af763ac4d657 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -593,6 +593,8 @@ static void nvme_partition_scan_work(struct work_struct *work)
 	if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
 					     &head->disk->state)))
 		return;
+	if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
+		return;
 
 	mutex_lock(&head->disk->open_mutex);
 	bdev_disk_changed(head->disk, false);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 78ba6162361a..5f7b5d1f78c0 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -423,20 +423,37 @@ void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
 	cancel_delayed_work_sync(&ctrl->ka_work);
 }
 
+static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
+		struct nvmet_ns *ns)
+{
+	enum nvme_ana_state state = port->ana_state[ns->anagrpid];
+
+	if (unlikely(state == NVME_ANA_INACCESSIBLE))
+		return NVME_SC_ANA_INACCESSIBLE;
+	if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
+		return NVME_SC_ANA_PERSISTENT_LOSS;
+	if (unlikely(state == NVME_ANA_CHANGE))
+		return NVME_SC_ANA_TRANSITION;
+	return 0;
+}
+
 u16 nvmet_req_find_ns(struct nvmet_req *req)
 {
 	u32 nsid = le32_to_cpu(req->cmd->common.nsid);
 	struct nvmet_subsys *subsys = nvmet_req_subsys(req);
+	u16 status = 0;
 
 	req->ns = xa_load(&subsys->namespaces, nsid);
 	if (unlikely(!req->ns || !req->ns->enabled)) {
 		req->error_loc = offsetof(struct nvme_common_command, nsid);
 		if (!req->ns) /* ns doesn't exist! */
 			return NVME_SC_INVALID_NS | NVME_STATUS_DNR;
-
-		/* ns exists but it's disabled */
+		status = nvmet_check_ana_state(req->port, req->ns);
+		if (!status)
+			/* ns exists but it's disabled */
+			status = NVME_SC_INTERNAL_PATH_ERROR;
 		req->ns = NULL;
-		return NVME_SC_INTERNAL_PATH_ERROR;
+		return status;
 	}
 
 	percpu_ref_get(&req->ns->ref);
@@ -965,20 +982,6 @@ int nvmet_sq_init(struct nvmet_sq *sq)
 }
 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 
-static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
-		struct nvmet_ns *ns)
-{
-	enum nvme_ana_state state = port->ana_state[ns->anagrpid];
-
-	if (unlikely(state == NVME_ANA_INACCESSIBLE))
-		return NVME_SC_ANA_INACCESSIBLE;
-	if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
-		return NVME_SC_ANA_PERSISTENT_LOSS;
-	if (unlikely(state == NVME_ANA_CHANGE))
-		return NVME_SC_ANA_TRANSITION;
-	return 0;
-}
-
 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
 {
 	if (unlikely(req->ns->readonly)) {
@@ -1040,14 +1043,11 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 		return nvmet_parse_passthru_io_cmd(req);
 
 	ret = nvmet_req_find_ns(req);
-	if (unlikely(ret))
-		return ret;
-
-	ret = nvmet_check_ana_state(req->port, req->ns);
 	if (unlikely(ret)) {
 		req->error_loc = offsetof(struct nvme_common_command, nsid);
 		return ret;
 	}
+
 	ret = nvmet_io_cmd_check_access(req);
 	if (unlikely(ret)) {
 		req->error_loc = offsetof(struct nvme_common_command, nsid);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 2b09b2c69857..4533e9997c7e 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -285,8 +285,16 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 		bio_init(bio, req->ns->bdev, req->inline_bvec,
 			 ARRAY_SIZE(req->inline_bvec), opf);
 	} else {
+		if (!req->ns->enabled) {
+			nvmet_req_complete(req, NVME_SC_INTERNAL_PATH_ERROR);
+			return;
+		}
 		bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
 				GFP_KERNEL);
+		if (!bio) {
+			nvmet_req_complete(req, NVME_SC_INTERNAL);
+			return;
+		}
 	}
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_private = req;
@@ -313,6 +321,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 
 			bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
 					opf, GFP_KERNEL);
+			WARN_ON(!bio);
 			bio->bi_iter.bi_sector = sector;
 
 			bio_chain(bio, prev);
-- 
2.35.3




More information about the Linux-nvme mailing list