nvme/rdma initiator stuck on reboot

Steve Wise swise at opengridcomputing.com
Fri Aug 19 07:24:31 PDT 2016


> One other thing:  in both nvme_rdma_device_unplug() and
> nvme_rdma_del_ctrl(), the code kicks the delete_work thread to delete the
> controller and then calls flush_work().  This is a possible
> touch-after-free, no?  The proper way, I think, should be to take a ref on
> ctrl, kick the delete_work thread, call flush_work(), and then
> nvme_put_ctrl(ctrl).  Do you agree? 

IE: do we need this:

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 9c69393..6198eaa 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1341,9 +1341,15 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue
*queue)
                ret = 1;
        }

-       /* Queue controller deletion */
+       /*
+        * Queue controller deletion. Keep a reference until all
+        * work is flushed since delete_work will free the ctrl mem
+        */
+       kref_get(&ctrl->ctrl.kref);
        queue_work(nvme_rdma_wq, &ctrl->delete_work);
        flush_work(&ctrl->delete_work);
+       nvme_put_ctrl(&ctrl->ctrl);
+
        return ret;
 }

@@ -1690,15 +1696,22 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl
*ctrl)
 static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
 {
        struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-       int ret;
+       int ret = 0;
+
+       /*
+        * Keep a reference until all work is flushed since
+        * __nvme_rdma_del_ctrl can free the ctrl mem
+        */
+       kref_get(&ctrl->ctrl.kref);

        ret = __nvme_rdma_del_ctrl(ctrl);
        if (ret)
-               return ret;
+               goto out;

        flush_work(&ctrl->delete_work);
-
-       return 0;
+out:
+       nvme_put_ctrl(&ctrl->ctrl);
+       return ret;
 }

 static void nvme_rdma_remove_ctrl_work(struct work_struct *work)





More information about the Linux-nvme mailing list