nvme/rdma initiator stuck on reboot
Steve Wise
swise at opengridcomputing.com
Fri Aug 19 07:24:31 PDT 2016
> One other thing: in both nvme_rdma_device_unplug() and
> nvme_rdma_del_ctrl(), the code kicks the delete_work thread to delete the
> controller and then calls flush_work(). This is a possible
> touch-after-free, no? The proper way, I think, should be to take a ref on
> ctrl, kick the delete_work thread, call flush_work(), and then
> nvme_put_ctrl(ctrl). Do you agree?
IE: do we need this:
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 9c69393..6198eaa 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1341,9 +1341,15 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue
*queue)
ret = 1;
}
- /* Queue controller deletion */
+ /*
+ * Queue controller deletion. Keep a reference until all
+ * work is flushed since delete_work will free the ctrl mem
+ */
+ kref_get(&ctrl->ctrl.kref);
queue_work(nvme_rdma_wq, &ctrl->delete_work);
flush_work(&ctrl->delete_work);
+ nvme_put_ctrl(&ctrl->ctrl);
+
return ret;
}
@@ -1690,15 +1696,22 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl
*ctrl)
static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
- int ret;
+ int ret = 0;
+
+ /*
+ * Keep a reference until all work is flushed since
+ * __nvme_rdma_del_ctrl can free the ctrl mem
+ */
+ kref_get(&ctrl->ctrl.kref);
ret = __nvme_rdma_del_ctrl(ctrl);
if (ret)
- return ret;
+ goto out;
flush_work(&ctrl->delete_work);
-
- return 0;
+out:
+ nvme_put_ctrl(&ctrl->ctrl);
+ return ret;
}
static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
More information about the Linux-nvme
mailing list