[PATCH] nvme-rdma: avoid stale device wrapper in remove_one
Cen Zhang
zzzccc427 at gmail.com
Sun Jun 21 06:59:39 PDT 2026
nvme_rdma_remove_one() walks nvme_rdma_ctrl_list under
nvme_rdma_ctrl_mutex, but it identifies matching controllers by reading
ctrl->device->dev. The mutex only protects controller list membership.
ctrl->device is a cached copy of queue 0's nvme_rdma_device, and that
wrapper is refcounted by queue lifetime.
The buggy scenario involves two paths, with each column showing the order
within that path:
RDMA remove callback: Controller error recovery:
1. enter nvme_rdma_remove_one() 1. run nvme_rdma_error_recovery_work()
2. walk nvme_rdma_ctrl_list 2. tear down the admin queue
3. read ctrl->device->dev 3. drop the final queue device ref
4. free the nvme_rdma_device wrapper
Fix this by caching the ib_device identity in the controller when the
admin queue is configured. The remove callback can then compare the
cached pointer value against the ib_device being removed without
dereferencing the queue-owned nvme_rdma_device wrapper. Keep the delete
workqueue flush conditional on actually matching a controller.
Validation reproduced this kernel report:
BUG: KASAN: slab-use-after-free in nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]
Call Trace:
<TASK>
dump_stack_lvl+0x66/0xa0
print_report+0xce/0x630
? nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]
? srso_alias_return_thunk+0x5/0xfbef5
? __virt_addr_valid+0x20d/0x410
? nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]
kasan_report+0xe0/0x110
? nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]
nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]
remove_client_context+0xa9/0xf0 [ib_core]
disable_device+0x12d/0x240 [ib_core]
? __pfx_disable_device+0x10/0x10 [ib_core]
? srso_alias_return_thunk+0x5/0xfbef5
? __mutex_unlock_slowpath+0x147/0x900
__ib_unregister_device+0x26f/0x460 [ib_core]
ib_unregister_device_and_put+0x55/0x70 [ib_core]
nldev_dellink+0x29e/0x3c0 [ib_core]
? unwind_next_frame+0x6e3/0x2190
? __pfx_nldev_dellink+0x10/0x10 [ib_core]
? lock_acquire+0x2b8/0x2f0
? srso_alias_return_thunk+0x5/0xfbef5
? cap_capable+0x196/0x330
? __pfx_down_read+0x10/0x10
rdma_nl_rcv_msg+0x2db/0x5f0 [ib_core]
? __pfx_rdma_nl_rcv_msg+0x10/0x10 [ib_core]
rdma_nl_rcv_skb.constprop.0.isra.0+0x222/0x380 [ib_core]
? __pfx_rdma_nl_rcv_skb.constprop.0.isra.0+0x10/0x10 [ib_core]
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? netlink_deliver_tap+0x150/0xac0
netlink_unicast+0x47c/0x790
? __pfx_netlink_unicast+0x10/0x10
netlink_sendmsg+0x767/0xc30
? __pfx_netlink_sendmsg+0x10/0x10
? lock_release+0x1e0/0x280
__sys_sendto+0x339/0x390
? __pfx___sys_sendto+0x10/0x10
? srso_alias_return_thunk+0x5/0xfbef5
__x64_sys_sendto+0xe0/0x1c0
? do_syscall_64+0x81/0x6a0
? srso_alias_return_thunk+0x5/0xfbef5
? trace_hardirqs_on+0x18/0x160
do_syscall_64+0x115/0x6a0
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Allocated by task 436:
kasan_save_stack+0x33/0x60
kasan_save_track+0x14/0x30
__kasan_kmalloc+0xaa/0xb0
nvme_rdma_cm_handler+0xcbc/0x2914 [nvme_rdma]
cma_cm_event_handler+0xb2/0x390 [rdma_cm]
addr_handler+0x199/0x2b0 [rdma_cm]
process_one_req+0x113/0x650 [ib_core]
process_one_work+0x8d0/0x1870
worker_thread+0x575/0xf80
kthread+0x2e7/0x3c0
ret_from_fork+0x576/0x810
ret_from_fork_asm+0x1a/0x30
Freed by task 436:
kasan_save_stack+0x33/0x60
kasan_save_track+0x14/0x30
kasan_save_free_info+0x3b/0x60
__kasan_slab_free+0x5f/0x80
kfree+0x307/0x580
nvme_rdma_free_dev+0x16d/0x260 [nvme_rdma]
nvme_rdma_free_queue+0x6d/0x90 [nvme_rdma]
nvme_rdma_error_recovery_work+0x7f/0x110 [nvme_rdma]
process_one_work+0x8d0/0x1870
worker_thread+0x575/0xf80
kthread+0x2e7/0x3c0
ret_from_fork+0x576/0x810
ret_from_fork_asm+0x1a/0x30
Fixes: e87a911fed07 ("nvme-rdma: use ib_client API to detect device removal")
Assisted-by: Codex:gpt-5.5
Signed-off-by: Cen Zhang <zzzccc427 at gmail.com>
---
drivers/nvme/host/rdma.c | 21 ++++++---------------
1 file changed, 6 insertions(+), 15 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6909e3542794..7c4a1e5f4991 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -116,6 +116,7 @@ struct nvme_rdma_ctrl {
struct blk_mq_tag_set admin_tag_set;
struct nvme_rdma_device *device;
+ struct ib_device *ib_device;
u32 max_fr_pages;
@@ -788,6 +789,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
return error;
ctrl->device = ctrl->queues[0].device;
+ WRITE_ONCE(ctrl->ib_device, ctrl->device->dev);
ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
/* T10-PI support */
@@ -2372,31 +2374,20 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
{
struct nvme_rdma_ctrl *ctrl;
- struct nvme_rdma_device *ndev;
bool found = false;
- mutex_lock(&device_list_mutex);
- list_for_each_entry(ndev, &device_list, entry) {
- if (ndev->dev == ib_device) {
- found = true;
- break;
- }
- }
- mutex_unlock(&device_list_mutex);
-
- if (!found)
- return;
-
/* Delete all controllers using this device */
mutex_lock(&nvme_rdma_ctrl_mutex);
list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
- if (ctrl->device->dev != ib_device)
+ if (READ_ONCE(ctrl->ib_device) != ib_device)
continue;
+ found = true;
nvme_delete_ctrl(&ctrl->ctrl);
}
mutex_unlock(&nvme_rdma_ctrl_mutex);
- flush_workqueue(nvme_delete_wq);
+ if (found)
+ flush_workqueue(nvme_delete_wq);
}
static struct ib_client nvme_rdma_ib_client = {
--
2.43.0
More information about the Linux-nvme
mailing list