[PATCH] nvme-rdma: avoid stale device wrapper in remove_one

Cen Zhang zzzccc427 at gmail.com
Sun Jun 21 06:59:39 PDT 2026


nvme_rdma_remove_one() walks nvme_rdma_ctrl_list under
nvme_rdma_ctrl_mutex, but it identifies matching controllers by reading
ctrl->device->dev.  The mutex only protects controller list membership.
ctrl->device is a cached copy of queue 0's nvme_rdma_device, and that
wrapper is refcounted by queue lifetime.

The buggy scenario involves two paths, with each column showing the order
within that path:

RDMA remove callback:                Controller error recovery:
  1. enter nvme_rdma_remove_one()    1. run nvme_rdma_error_recovery_work()
  2. walk nvme_rdma_ctrl_list        2. tear down the admin queue
  3. read ctrl->device->dev          3. drop the final queue device ref
                                      4. free the nvme_rdma_device wrapper

Fix this by caching the ib_device identity in the controller when the
admin queue is configured.  The remove callback can then compare the
cached pointer value against the ib_device being removed without
dereferencing the queue-owned nvme_rdma_device wrapper.  Keep the delete
workqueue flush conditional on actually matching a controller.

Validation reproduced this kernel report:
BUG: KASAN: slab-use-after-free in nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]

Call Trace:
 <TASK>
 dump_stack_lvl+0x66/0xa0
 print_report+0xce/0x630
 ? nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]
 ? srso_alias_return_thunk+0x5/0xfbef5
 ? __virt_addr_valid+0x20d/0x410
 ? nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]
 kasan_report+0xe0/0x110
 ? nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]
 nvme_rdma_remove_one+0x281/0x2c0 [nvme_rdma]
 remove_client_context+0xa9/0xf0 [ib_core]
 disable_device+0x12d/0x240 [ib_core]
 ? __pfx_disable_device+0x10/0x10 [ib_core]
 ? srso_alias_return_thunk+0x5/0xfbef5
 ? __mutex_unlock_slowpath+0x147/0x900
 __ib_unregister_device+0x26f/0x460 [ib_core]
 ib_unregister_device_and_put+0x55/0x70 [ib_core]
 nldev_dellink+0x29e/0x3c0 [ib_core]
 ? unwind_next_frame+0x6e3/0x2190
 ? __pfx_nldev_dellink+0x10/0x10 [ib_core]
 ? lock_acquire+0x2b8/0x2f0
 ? srso_alias_return_thunk+0x5/0xfbef5
 ? cap_capable+0x196/0x330
 ? __pfx_down_read+0x10/0x10
 rdma_nl_rcv_msg+0x2db/0x5f0 [ib_core]
 ? __pfx_rdma_nl_rcv_msg+0x10/0x10 [ib_core]
 rdma_nl_rcv_skb.constprop.0.isra.0+0x222/0x380 [ib_core]
 ? __pfx_rdma_nl_rcv_skb.constprop.0.isra.0+0x10/0x10 [ib_core]
 ? srso_alias_return_thunk+0x5/0xfbef5
 ? srso_alias_return_thunk+0x5/0xfbef5
 ? netlink_deliver_tap+0x150/0xac0
 netlink_unicast+0x47c/0x790
 ? __pfx_netlink_unicast+0x10/0x10
 netlink_sendmsg+0x767/0xc30
 ? __pfx_netlink_sendmsg+0x10/0x10
 ? lock_release+0x1e0/0x280
 __sys_sendto+0x339/0x390
 ? __pfx___sys_sendto+0x10/0x10
 ? srso_alias_return_thunk+0x5/0xfbef5
 __x64_sys_sendto+0xe0/0x1c0
 ? do_syscall_64+0x81/0x6a0
 ? srso_alias_return_thunk+0x5/0xfbef5
 ? trace_hardirqs_on+0x18/0x160
 do_syscall_64+0x115/0x6a0
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Allocated by task 436:
 kasan_save_stack+0x33/0x60
 kasan_save_track+0x14/0x30
 __kasan_kmalloc+0xaa/0xb0
 nvme_rdma_cm_handler+0xcbc/0x2914 [nvme_rdma]
 cma_cm_event_handler+0xb2/0x390 [rdma_cm]
 addr_handler+0x199/0x2b0 [rdma_cm]
 process_one_req+0x113/0x650 [ib_core]
 process_one_work+0x8d0/0x1870
 worker_thread+0x575/0xf80
 kthread+0x2e7/0x3c0
 ret_from_fork+0x576/0x810
 ret_from_fork_asm+0x1a/0x30

Freed by task 436:
 kasan_save_stack+0x33/0x60
 kasan_save_track+0x14/0x30
 kasan_save_free_info+0x3b/0x60
 __kasan_slab_free+0x5f/0x80
 kfree+0x307/0x580
 nvme_rdma_free_dev+0x16d/0x260 [nvme_rdma]
 nvme_rdma_free_queue+0x6d/0x90 [nvme_rdma]
 nvme_rdma_error_recovery_work+0x7f/0x110 [nvme_rdma]
 process_one_work+0x8d0/0x1870
 worker_thread+0x575/0xf80
 kthread+0x2e7/0x3c0
 ret_from_fork+0x576/0x810
 ret_from_fork_asm+0x1a/0x30

Fixes: e87a911fed07 ("nvme-rdma: use ib_client API to detect device removal")
Assisted-by: Codex:gpt-5.5
Signed-off-by: Cen Zhang <zzzccc427 at gmail.com>
---
 drivers/nvme/host/rdma.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6909e3542794..7c4a1e5f4991 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -116,6 +116,7 @@ struct nvme_rdma_ctrl {
 
 	struct blk_mq_tag_set	admin_tag_set;
 	struct nvme_rdma_device	*device;
+	struct ib_device	*ib_device;
 
 	u32			max_fr_pages;
 
@@ -788,6 +789,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		return error;
 
 	ctrl->device = ctrl->queues[0].device;
+	WRITE_ONCE(ctrl->ib_device, ctrl->device->dev);
 	ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
 
 	/* T10-PI support */
@@ -2372,31 +2374,20 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
 static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
 {
 	struct nvme_rdma_ctrl *ctrl;
-	struct nvme_rdma_device *ndev;
 	bool found = false;
 
-	mutex_lock(&device_list_mutex);
-	list_for_each_entry(ndev, &device_list, entry) {
-		if (ndev->dev == ib_device) {
-			found = true;
-			break;
-		}
-	}
-	mutex_unlock(&device_list_mutex);
-
-	if (!found)
-		return;
-
 	/* Delete all controllers using this device */
 	mutex_lock(&nvme_rdma_ctrl_mutex);
 	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
-		if (ctrl->device->dev != ib_device)
+		if (READ_ONCE(ctrl->ib_device) != ib_device)
 			continue;
+		found = true;
 		nvme_delete_ctrl(&ctrl->ctrl);
 	}
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	flush_workqueue(nvme_delete_wq);
+	if (found)
+		flush_workqueue(nvme_delete_wq);
 }
 
 static struct ib_client nvme_rdma_ib_client = {
-- 
2.43.0




More information about the Linux-nvme mailing list