[PATCH WIP/RFC 6/6] nvme-rdma: keep a cm_id around during reconnect to get events

Steve Wise swise at opengridcomputing.com
Fri Aug 26 06:52:59 PDT 2016


This patch adds the concept of an "unplug" cm_id for each nvme_rdma_ctrl
controller.  When the controller is first created and the admin qp
is connected to the target, the unplug_cm_id is created and address
resolution is done on it to bind it to the same device that the admin QP
is bound to.   This unplug_cm_id remains across any/all kato recovery and
thus will always be available for DEVICE_REMOVAL events.  This simplifies
the unplug handler because the cm_id isn't associated with any of the IO
queues nor the admin queue.  Plus it ensures a cm_id is always available
per controller to get the DEVICE_REMOVAL event.

Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
 drivers/nvme/host/rdma.c | 134 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 107 insertions(+), 27 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index b99d7fd..f05fa0c 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -140,6 +140,11 @@ struct nvme_rdma_ctrl {
 	};
 
 	struct nvme_ctrl	ctrl;
+
+	/* the cm_id and vars for device unplug events */
+	struct rdma_cm_id	*unplug_cm_id;
+	int			unplug_cm_error;
+	struct completion	unplug_cm_done;
 };
 
 static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
@@ -168,6 +173,7 @@ MODULE_PARM_DESC(register_always,
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event);
 static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static int nvme_rdma_device_unplug(struct nvme_rdma_ctrl *ctrl);
 
 /* XXX: really should move to a generic header sooner or later.. */
 static inline void put_unaligned_le24(u32 val, u8 *p)
@@ -553,6 +559,79 @@ out:
 	return ret;
 }
 
+static int nvme_rdma_unplug_cm_handler(struct rdma_cm_id *cm_id,
+		struct rdma_cm_event *ev)
+{
+	struct nvme_rdma_ctrl *ctrl = cm_id->context;
+	int ret = 0;
+
+	dev_warn(ctrl->ctrl.device, "%s (%d): status %d id %p\n",
+		rdma_event_msg(ev->event), ev->event,
+		ev->status, cm_id);
+
+	switch (ev->event) {
+	case RDMA_CM_EVENT_ADDR_RESOLVED:
+		ctrl->unplug_cm_error = 0;
+		complete(&ctrl->unplug_cm_done);
+		break;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		/* return 1 means implicit CM ID destroy */
+		ret = nvme_rdma_device_unplug(ctrl);
+		break;
+	default:
+		dev_err(ctrl->ctrl.device,
+			"Unexpected RDMA CM event (%d) status %d\n",
+			ev->event, ev->status);
+		ctrl->unplug_cm_error = ev->status;
+		complete(&ctrl->unplug_cm_done);
+		break;
+	}
+	return ret;
+}
+
+static int nvme_rdma_init_unplug_cm_id(struct nvme_rdma_ctrl *ctrl)
+{
+	int ret;
+
+	dev_info(ctrl->ctrl.device, "%s enter\n", __func__);
+	init_completion(&ctrl->unplug_cm_done);
+
+	ctrl->unplug_cm_id = rdma_create_id(&init_net,
+			nvme_rdma_unplug_cm_handler, ctrl,
+			RDMA_PS_TCP, IB_QPT_RC);
+	if (IS_ERR(ctrl->unplug_cm_id)) {
+		dev_info(ctrl->ctrl.device, "failed to create CM ID: %ld\n",
+			PTR_ERR(ctrl->unplug_cm_id));
+		return PTR_ERR(ctrl->unplug_cm_id);
+	}
+
+	ctrl->unplug_cm_error = -ETIMEDOUT;
+	ret = rdma_resolve_addr(ctrl->unplug_cm_id, NULL, &ctrl->addr,
+			NVME_RDMA_CONNECT_TIMEOUT_MS);
+	if (ret) {
+		dev_info(ctrl->ctrl.device,
+			"rdma_resolve_addr failed (%d).\n", ret);
+		goto out_destroy_unplug_cm_id;
+	}
+	wait_for_completion_interruptible_timeout(&ctrl->unplug_cm_done,
+			msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1);
+	ret = ctrl->unplug_cm_error;
+	if (ret) {
+		dev_info(ctrl->ctrl.device,
+			"nvme_rdma_init_unplug_unplug_cm_id failed (%d).\n",
+			ret);
+		goto out_destroy_unplug_cm_id;
+	}
+
+	dev_info(ctrl->ctrl.device, "%s exit\n", __func__);
+	return 0;
+
+out_destroy_unplug_cm_id:
+	rdma_destroy_id(ctrl->unplug_cm_id);
+	dev_info(ctrl->ctrl.device, "%s exit err %d\n", __func__, ret);
+	return ret;
+}
+
 static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 		int idx, size_t queue_size)
 {
@@ -594,6 +673,15 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 		goto out_destroy_cm_id;
 	}
 
+	if (idx == 0 && !ctrl->unplug_cm_id) {
+		ret = nvme_rdma_init_unplug_cm_id(ctrl);
+		if (ret) {
+			dev_info(ctrl->ctrl.device,
+				"init_unplug_cm_id failed (%d).\n", ret);
+			goto out_destroy_cm_id;
+		}
+	}
+
 	set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
 
 	return 0;
@@ -1323,30 +1411,24 @@ out_destroy_queue_ib:
 
 /**
  * nvme_rdma_device_unplug() - Handle RDMA device unplug
- * @queue:      Queue that owns the cm_id that caught the event
+ * @ctrl:      Controller that owns the unplug_cm_id that caught the event
  *
  * DEVICE_REMOVAL event notifies us that the RDMA device is about
  * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
+ * This event will be generated for each allocated cm_id, but only handled
+ * by each controller's unplug_cm_id.
  *
- * In our case, the RDMA resources are managed per controller and not
- * only per queue. So the way we handle this is we trigger an implicit
- * controller deletion upon the first DEVICE_REMOVAL event we see, and
- * hold the event inflight until the controller deletion is completed.
+ * Trigger an implicit controller deletion and hold the event inflight
+ * until the controller deletion is completed.
  *
- * One exception that we need to handle is the destruction of the cm_id
+ * One exception that we need to handle is the destruction of the unplug_cm_id
  * that caught the event. Since we hold the callout until the controller
  * deletion is completed, we'll deadlock if the controller deletion will
- * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
- * of destroying this queue before-hand, destroy the queue resources,
- * then queue the controller deletion which won't destroy this queue and
- * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
+ * call rdma_destroy_id on this queue's cm_id.  We destroy the unplug_cm_id
+ * implicitely by returning a non-zero rc to the callout.
  */
-static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
+static int nvme_rdma_device_unplug(struct nvme_rdma_ctrl *ctrl)
 {
-	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-	int ret = 0;
-
 	/* Own the controller deletion */
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
 		return 0;
@@ -1357,15 +1439,11 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
 	/* Get rid of reconnect work if its running */
 	cancel_delayed_work_sync(&ctrl->reconnect_work);
 
-	/* Disable the queue so ctrl delete won't free it */
-	if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
-		/* Free this queue ourselves */
-		nvme_rdma_stop_queue(queue);
-		nvme_rdma_destroy_queue_ib(queue);
-
-		/* Return non-zero so the cm_id will destroy implicitly */
-		ret = 1;
-	}
+	/*
+	 * NULL out the unplug_cm_id pointer so the controller deletion
+	 * does not free it.
+	 */
+	ctrl->unplug_cm_id = NULL;
 
 	/*
 	 * Queue controller deletion. Keep a reference until all
@@ -1376,7 +1454,7 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
 	flush_work(&ctrl->delete_work);
 	nvme_put_ctrl(&ctrl->ctrl);
 
-	return ret;
+	return 1;
 }
 
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
@@ -1420,8 +1498,8 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		nvme_rdma_error_recovery(queue->ctrl);
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-		/* return 1 means impliciy CM ID destroy */
-		return nvme_rdma_device_unplug(queue);
+		/* handled on ctrl->unplug_cm_id */
+		break;
 	default:
 		dev_err(queue->ctrl->ctrl.device,
 			"Unexpected RDMA CM event (%d)\n", ev->event);
@@ -1697,6 +1775,8 @@ static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 		nvme_rdma_dev_put(ctrl->device);
 	}
 
+	if (ctrl->unplug_cm_id)
+		rdma_destroy_id(ctrl->unplug_cm_id);
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
-- 
2.7.0




More information about the Linux-nvme mailing list