[PATCH] nvmet-rdma: Release connections synchronously
Bart Van Assche
bvanassche at acm.org
Thu May 11 08:03:20 PDT 2023
Lockdep complains about flushing queue->release_work in the NVMe RDMA target
driver because all instances of queue->release_work have the same lock class
key. Assigning a different lock class key to each release_work instance is
difficult because dynamically registered lock class keys must be unregistered
and the last use of the work_struct lock class key happens after the work
callback has returned. Hence rework the code for releasing connections as
follows:
* Add an argument to __nvmet_rdma_queue_disconnect() and also to
nvmet_rdma_queue_disconnect() that indicates whether or not these functions
are called from inside a CM handler callback.
* Do not call rdma_destroy_id() if called from inside a CM handler callback.
* Let these functions return a negative value if the caller should free the
CM ID.
* Let the CM handler return a negative value if its caller should free the
CM ID.
* Remove queue->release_work since releasing connections now happens
synchronously.
This patch suppresses the following lockdep complaint:
======================================================
WARNING: possible circular locking dependency detected
6.3.0 #62 Not tainted
------------------------------------------------------
kworker/1:3/455 is trying to acquire lock:
ffff88813bfd9420 (&id_priv->handler_mutex){+.+.}-{3:3}, at: rdma_destroy_id+0x17/0x20 [rdma_cm]
but task is already holding lock:
ffff888131327db0 ((work_completion)(&queue->release_work)){+.+.}-{0:0}, at: process_one_work+0x793/0x1350
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #3 ((work_completion)(&queue->release_work)){+.+.}-{0:0}:
process_one_work+0x7dc/0x1350
worker_thread+0xfc/0x1260
kthread+0x29e/0x340
ret_from_fork+0x2c/0x50
-> #2 ((wq_completion)nvmet-wq){+.+.}-{0:0}:
__flush_workqueue+0x130/0x12f0
nvmet_rdma_cm_handler+0x961/0x39c0 [nvmet_rdma]
cma_cm_event_handler+0xb2/0x2f0 [rdma_cm]
cma_ib_req_handler+0x1096/0x4a50 [rdma_cm]
cm_process_work+0x46/0x3b0 [ib_cm]
cm_req_handler+0x20e2/0x61d0 [ib_cm]
cm_work_handler+0xc15/0x6ce0 [ib_cm]
process_one_work+0x843/0x1350
worker_thread+0xfc/0x1260
kthread+0x29e/0x340
ret_from_fork+0x2c/0x50
-> #1 (&id_priv->handler_mutex/1){+.+.}-{3:3}:
__mutex_lock+0x186/0x18f0
cma_ib_req_handler+0xc3c/0x4a50 [rdma_cm]
cm_process_work+0x46/0x3b0 [ib_cm]
cm_req_handler+0x20e2/0x61d0 [ib_cm]
cm_work_handler+0xc15/0x6ce0 [ib_cm]
process_one_work+0x843/0x1350
worker_thread+0xfc/0x1260
kthread+0x29e/0x340
ret_from_fork+0x2c/0x50
-> #0 (&id_priv->handler_mutex){+.+.}-{3:3}:
__lock_acquire+0x2fc0/0x60f0
lock_acquire+0x1a7/0x4e0
__mutex_lock+0x186/0x18f0
rdma_destroy_id+0x17/0x20 [rdma_cm]
nvmet_rdma_free_queue+0x7a/0x380 [nvmet_rdma]
nvmet_rdma_release_queue_work+0x3e/0x90 [nvmet_rdma]
process_one_work+0x843/0x1350
worker_thread+0xfc/0x1260
kthread+0x29e/0x340
ret_from_fork+0x2c/0x50
other info that might help us debug this:
Chain exists of:
&id_priv->handler_mutex --> (wq_completion)nvmet-wq --> (work_completion)(&queue->release_work)
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((work_completion)(&queue->release_work));
lock((wq_completion)nvmet-wq);
lock((work_completion)(&queue->release_work));
lock(&id_priv->handler_mutex);
*** DEADLOCK ***
2 locks held by kworker/1:3/455:
#0: ffff888127b28538 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x766/0x1350
#1: ffff888131327db0 ((work_completion)(&queue->release_work)){+.+.}-{0:0}, at: process_one_work+0x793/0x1350
stack backtrace:
CPU: 1 PID: 455 Comm: kworker/1:3 Not tainted 6.3.0 #62
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014
Workqueue: nvmet-wq nvmet_rdma_release_queue_work [nvmet_rdma]
Call Trace:
<TASK>
dump_stack_lvl+0x57/0x90
check_noncircular+0x27b/0x310
__lock_acquire+0x2fc0/0x60f0
lock_acquire+0x1a7/0x4e0
__mutex_lock+0x186/0x18f0
rdma_destroy_id+0x17/0x20 [rdma_cm]
nvmet_rdma_free_queue+0x7a/0x380 [nvmet_rdma]
nvmet_rdma_release_queue_work+0x3e/0x90 [nvmet_rdma]
process_one_work+0x843/0x1350
worker_thread+0xfc/0x1260
kthread+0x29e/0x340
ret_from_fork+0x2c/0x50
</TASK>
See also https://lore.kernel.org/all/rsmmxrchy6voi5qhl4irss5sprna3f5owkqtvybxglcv2pnylm@xmrnpfu3tfpe/
Cc: Sagi Grimberg <sagi at grimberg.me>
Cc: Hannes Reinecke <hare at suse.de>
Cc: Shin'ichiro Kawasaki <shinichiro.kawasaki at wdc.com>
Signed-off-by: Bart Van Assche <bvanassche at acm.org>
---
drivers/nvme/target/rdma.c | 70 ++++++++++++++++++++------------------
1 file changed, 37 insertions(+), 33 deletions(-)
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 4597bca43a6d..cfd9d4368248 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -101,7 +101,6 @@ struct nvmet_rdma_queue {
spinlock_t rsps_lock;
struct nvmet_rdma_cmd *cmds;
- struct work_struct release_work;
struct list_head rsp_wait_list;
struct list_head rsp_wr_wait_list;
spinlock_t rsp_wr_wait_lock;
@@ -167,7 +166,8 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
-static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
+static int nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue,
+ bool from_cm_handler);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_rsp *r);
static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
@@ -693,7 +693,7 @@ static void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue)
* of admin connect error, just disconnect and
* cleanup the queue
*/
- nvmet_rdma_queue_disconnect(queue);
+ nvmet_rdma_queue_disconnect(queue, false);
}
}
@@ -1360,17 +1360,6 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
kfree(queue);
}
-static void nvmet_rdma_release_queue_work(struct work_struct *w)
-{
- struct nvmet_rdma_queue *queue =
- container_of(w, struct nvmet_rdma_queue, release_work);
- struct nvmet_rdma_device *dev = queue->dev;
-
- nvmet_rdma_free_queue(queue);
-
- kref_put(&dev->ref, nvmet_rdma_free_dev);
-}
-
static int
nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
struct nvmet_rdma_queue *queue)
@@ -1441,11 +1430,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
if (ret)
goto out_destroy_sq;
- /*
- * Schedules the actual release because calling rdma_destroy_id from
- * inside a CM callback would trigger a deadlock. (great API design..)
- */
- INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
queue->dev = ndev;
queue->cm_id = cm_id;
queue->port = port->nport;
@@ -1582,11 +1566,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
goto put_device;
}
- if (queue->host_qid == 0) {
- /* Let inflight controller teardown complete */
- flush_workqueue(nvmet_wq);
- }
-
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
if (ret) {
/*
@@ -1638,10 +1617,17 @@ static void nvmet_rdma_queue_established(struct nvmet_rdma_queue *queue)
spin_unlock_irqrestore(&queue->state_lock, flags);
}
-static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
+/*
+ * If called from inside a CM handler callback, the CM ID must not be
+ * destroyed. Instead, a negative value is returned if the caller should
+ * destroy the CM ID.
+ */
+static int __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue,
+ bool from_cm_handler)
{
bool disconnect = false;
unsigned long flags;
+ int ret = 0;
pr_debug("cm_id= %p queue->state= %d\n", queue->cm_id, queue->state);
@@ -1668,12 +1654,22 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
spin_unlock_irqrestore(&queue->state_lock, flags);
if (disconnect) {
+ struct nvmet_rdma_device *dev = queue->dev;
+
rdma_disconnect(queue->cm_id);
- queue_work(nvmet_wq, &queue->release_work);
+ if (from_cm_handler)
+ ret = -ENODEV;
+ else
+ rdma_destroy_id(queue->cm_id);
+ nvmet_rdma_free_queue(queue);
+ kref_put(&dev->ref, nvmet_rdma_free_dev);
}
+
+ return ret;
}
-static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
+static int nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue,
+ bool from_cm_handler)
{
bool disconnect = false;
@@ -1685,12 +1681,16 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
mutex_unlock(&nvmet_rdma_queue_mutex);
if (disconnect)
- __nvmet_rdma_queue_disconnect(queue);
+ return __nvmet_rdma_queue_disconnect(queue, from_cm_handler);
+
+ return 0;
}
static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
struct nvmet_rdma_queue *queue)
{
+ struct nvmet_rdma_device *dev = queue->dev;
+
WARN_ON_ONCE(queue->state != NVMET_RDMA_Q_CONNECTING);
mutex_lock(&nvmet_rdma_queue_mutex);
@@ -1699,7 +1699,9 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
mutex_unlock(&nvmet_rdma_queue_mutex);
pr_err("failed to connect queue %d\n", queue->idx);
- queue_work(nvmet_wq, &queue->release_work);
+
+ nvmet_rdma_free_queue(queue);
+ kref_put(&dev->ref, nvmet_rdma_free_dev);
}
/**
@@ -1749,6 +1751,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
return 1;
}
+/* The caller destroys @cm_id if this function does not return zero. */
static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -1779,7 +1782,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
fallthrough;
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- nvmet_rdma_queue_disconnect(queue);
+ ret = nvmet_rdma_queue_disconnect(queue, true);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
ret = nvmet_rdma_device_removal(cm_id, queue);
@@ -1791,6 +1794,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_CONNECT_ERROR:
nvmet_rdma_queue_connect_fail(cm_id, queue);
+ ret = -ENODEV;
break;
default:
pr_err("received unrecognized RDMA CM event %d\n",
@@ -1812,7 +1816,7 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl)
list_del_init(&queue->queue_list);
mutex_unlock(&nvmet_rdma_queue_mutex);
- __nvmet_rdma_queue_disconnect(queue);
+ __nvmet_rdma_queue_disconnect(queue, false);
goto restart;
}
}
@@ -1831,7 +1835,7 @@ static void nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port *port)
continue;
list_del_init(&queue->queue_list);
- __nvmet_rdma_queue_disconnect(queue);
+ __nvmet_rdma_queue_disconnect(queue, false);
}
mutex_unlock(&nvmet_rdma_queue_mutex);
}
@@ -2049,7 +2053,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data
pr_info("Removing queue %d\n", queue->idx);
list_del_init(&queue->queue_list);
- __nvmet_rdma_queue_disconnect(queue);
+ __nvmet_rdma_queue_disconnect(queue, false);
}
mutex_unlock(&nvmet_rdma_queue_mutex);
More information about the Linux-nvme
mailing list