[PATCH] nvme-multipath: fix lockdep warning on shutdown
hare at kernel.org
hare at kernel.org
Thu Jan 23 23:14:39 PST 2025
From: Hannes Reinecke <hare at kernel.org>
During shutdown of multipath devices lockdep complained about a
potential circular locking:
WARNING: possible circular locking dependency detected
(udev-worker)/2792 is trying to acquire lock:
ffff8881012a4348 ((wq_completion)kblockd){+.+.}-{0:0}, at: touch_wq_lockdep_map+0
x26/0x90
but task is already holding lock:
ffff88811e4b7cc8 (&disk->open_mutex){+.+.}-{4:4}, at: bdev_release+0x61/0x1a0
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (&disk->open_mutex){+.+.}-{4:4}:
__mutex_lock+0xa5/0xe00
nvme_partition_scan_work+0x31/0x60
process_scheduled_works+0x37c/0x6f0
-> #1 ((work_completion)(&head->partition_scan_work)){+.+.}-{0:0}:
process_scheduled_works+0x348/0x6f0
worker_thread+0x127/0x2a0
-> #0 ((wq_completion)kblockd){+.+.}-{0:0}:
__lock_acquire+0x11f9/0x1790
lock_acquire+0x245/0x2d0
touch_wq_lockdep_map+0x3b/0x90
__flush_work+0x240/0x4b0
nvme_mpath_remove_disk+0x2b/0x50
nvme_free_ns_head+0x19/0x90
So the problem is that nvme_mpath_remove_disk() is called with the
disk->open_mutex held, hence calling flush_work on partition_scan_work
(which also will try to lock disk->open_mutex) will deadlock.
Fix this by checking for NVME_NSHEAD_DISK_LIVE before trying to lock
disk->open_mutex.
Fixes: 1f021341eef4 ("nvme-multipath: defer partition scanning")
Signed-off-by: Hannes Reinecke <hare at kernel.org>
---
block/blk-ioprio.c | 6 ++++-
drivers/nvme/host/multipath.c | 2 ++
drivers/nvme/target/core.c | 42 +++++++++++++++----------------
drivers/nvme/target/io-cmd-bdev.c | 9 +++++++
4 files changed, 37 insertions(+), 22 deletions(-)
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 8fff7ccc0ac7..9f1b2069a3c9 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -141,9 +141,13 @@ static struct blkcg_policy ioprio_policy = {
void blkcg_set_ioprio(struct bio *bio)
{
- struct ioprio_blkcg *blkcg = blkcg_to_ioprio_blkcg(bio->bi_blkg->blkcg);
+ struct ioprio_blkcg *blkcg;
u16 prio;
+ if (WARN_ON(!bio->bi_blkg || ! bio->bi_blkg->blkcg))
+ return;
+
+ blkcg = blkcg_to_ioprio_blkcg(bio->bi_blkg->blkcg);
if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
return;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a85d190942bd..af763ac4d657 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -593,6 +593,8 @@ static void nvme_partition_scan_work(struct work_struct *work)
if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
&head->disk->state)))
return;
+ if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
+ return;
mutex_lock(&head->disk->open_mutex);
bdev_disk_changed(head->disk, false);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 78ba6162361a..5f7b5d1f78c0 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -423,20 +423,37 @@ void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
cancel_delayed_work_sync(&ctrl->ka_work);
}
+static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
+ struct nvmet_ns *ns)
+{
+ enum nvme_ana_state state = port->ana_state[ns->anagrpid];
+
+ if (unlikely(state == NVME_ANA_INACCESSIBLE))
+ return NVME_SC_ANA_INACCESSIBLE;
+ if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
+ return NVME_SC_ANA_PERSISTENT_LOSS;
+ if (unlikely(state == NVME_ANA_CHANGE))
+ return NVME_SC_ANA_TRANSITION;
+ return 0;
+}
+
u16 nvmet_req_find_ns(struct nvmet_req *req)
{
u32 nsid = le32_to_cpu(req->cmd->common.nsid);
struct nvmet_subsys *subsys = nvmet_req_subsys(req);
+ u16 status = 0;
req->ns = xa_load(&subsys->namespaces, nsid);
if (unlikely(!req->ns || !req->ns->enabled)) {
req->error_loc = offsetof(struct nvme_common_command, nsid);
if (!req->ns) /* ns doesn't exist! */
return NVME_SC_INVALID_NS | NVME_STATUS_DNR;
-
- /* ns exists but it's disabled */
+ status = nvmet_check_ana_state(req->port, req->ns);
+ if (!status)
+ /* ns exists but it's disabled */
+ status = NVME_SC_INTERNAL_PATH_ERROR;
req->ns = NULL;
- return NVME_SC_INTERNAL_PATH_ERROR;
+ return status;
}
percpu_ref_get(&req->ns->ref);
@@ -965,20 +982,6 @@ int nvmet_sq_init(struct nvmet_sq *sq)
}
EXPORT_SYMBOL_GPL(nvmet_sq_init);
-static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
- struct nvmet_ns *ns)
-{
- enum nvme_ana_state state = port->ana_state[ns->anagrpid];
-
- if (unlikely(state == NVME_ANA_INACCESSIBLE))
- return NVME_SC_ANA_INACCESSIBLE;
- if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
- return NVME_SC_ANA_PERSISTENT_LOSS;
- if (unlikely(state == NVME_ANA_CHANGE))
- return NVME_SC_ANA_TRANSITION;
- return 0;
-}
-
static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
{
if (unlikely(req->ns->readonly)) {
@@ -1040,14 +1043,11 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
return nvmet_parse_passthru_io_cmd(req);
ret = nvmet_req_find_ns(req);
- if (unlikely(ret))
- return ret;
-
- ret = nvmet_check_ana_state(req->port, req->ns);
if (unlikely(ret)) {
req->error_loc = offsetof(struct nvme_common_command, nsid);
return ret;
}
+
ret = nvmet_io_cmd_check_access(req);
if (unlikely(ret)) {
req->error_loc = offsetof(struct nvme_common_command, nsid);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 2b09b2c69857..4533e9997c7e 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -285,8 +285,16 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio_init(bio, req->ns->bdev, req->inline_bvec,
ARRAY_SIZE(req->inline_bvec), opf);
} else {
+ if (!req->ns->enabled) {
+ nvmet_req_complete(req, NVME_SC_INTERNAL_PATH_ERROR);
+ return;
+ }
bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
GFP_KERNEL);
+ if (!bio) {
+ nvmet_req_complete(req, NVME_SC_INTERNAL);
+ return;
+ }
}
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
@@ -313,6 +321,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
opf, GFP_KERNEL);
+ WARN_ON(!bio);
bio->bi_iter.bi_sector = sector;
bio_chain(bio, prev);
--
2.35.3
More information about the Linux-nvme
mailing list