[PATCH v3 6/9] nvme-rdma: use implicit CQ allocation

Sagi Grimberg sagi at grimberg.me
Wed Nov 8 01:57:39 PST 2017


From: Christoph Hellwig <hch at lst.de>

Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 drivers/nvme/host/rdma.c | 62 +++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 32e21ab1ae52..3acf4d1ccfed 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -90,7 +90,6 @@ struct nvme_rdma_queue {
 	size_t			cmnd_capsule_len;
 	struct nvme_rdma_ctrl	*ctrl;
 	struct nvme_rdma_device	*device;
-	struct ib_cq		*ib_cq;
 	struct ib_qp		*qp;
 
 	unsigned long		flags;
@@ -241,24 +240,38 @@ static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue)
 	return queue->cm_error;
 }
 
-static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
+static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue)
 {
 	struct nvme_rdma_device *dev = queue->device;
 	struct ib_qp_init_attr init_attr;
-	int ret;
+	int ret, idx;
+	const int send_wr_factor = 3;		/* MR, SEND, INV */
 
 	memset(&init_attr, 0, sizeof(init_attr));
+	init_attr.create_flags = IB_QP_CREATE_ASSIGN_CQS;
 	init_attr.event_handler = nvme_rdma_qp_event;
+	init_attr.qp_context = queue;
+	init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+	init_attr.qp_type = IB_QPT_RC;
+	init_attr.poll_ctx = IB_POLL_SOFTIRQ;
+
 	/* +1 for drain */
-	init_attr.cap.max_send_wr = factor * queue->queue_size + 1;
+	init_attr.cap.max_send_wr = send_wr_factor * queue->queue_size + 1;
+	init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
+
 	/* +1 for drain */
 	init_attr.cap.max_recv_wr = queue->queue_size + 1;
 	init_attr.cap.max_recv_sge = 1;
-	init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
-	init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
-	init_attr.qp_type = IB_QPT_RC;
-	init_attr.send_cq = queue->ib_cq;
-	init_attr.recv_cq = queue->ib_cq;
+
+	/*
+	 * The admin queue is barely used once the controller is live, so don't
+	 * bother to spread it out.
+	 */
+	idx = nvme_rdma_queue_idx(queue);
+	if (idx > 0) {
+		init_attr.affinity_hint = idx;
+		init_attr.create_flags |= IB_QP_CREATE_AFFINITY_HINT;
+	}
 
 	ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
 
@@ -440,7 +453,6 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 	struct ib_device *ibdev = dev->dev;
 
 	rdma_destroy_qp(queue->cm_id);
-	ib_free_cq(queue->ib_cq);
 
 	nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
 			sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@@ -451,9 +463,6 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
 {
 	struct ib_device *ibdev;
-	const int send_wr_factor = 3;			/* MR, SEND, INV */
-	const int cq_factor = send_wr_factor + 1;	/* + RECV */
-	int comp_vector, idx = nvme_rdma_queue_idx(queue);
 	int ret;
 
 	queue->device = nvme_rdma_find_get_device(queue->cm_id);
@@ -464,24 +473,9 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
 	}
 	ibdev = queue->device->dev;
 
-	/*
-	 * Spread I/O queues completion vectors according their queue index.
-	 * Admin queues can always go on completion vector 0.
-	 */
-	comp_vector = idx == 0 ? idx : idx - 1;
-
-	/* +1 for ib_stop_cq */
-	queue->ib_cq = ib_alloc_cq(ibdev, queue,
-				cq_factor * queue->queue_size + 1,
-				comp_vector, IB_POLL_SOFTIRQ);
-	if (IS_ERR(queue->ib_cq)) {
-		ret = PTR_ERR(queue->ib_cq);
-		goto out_put_dev;
-	}
-
-	ret = nvme_rdma_create_qp(queue, send_wr_factor);
+	ret = nvme_rdma_create_qp(queue);
 	if (ret)
-		goto out_destroy_ib_cq;
+		goto out_put_dev;
 
 	queue->rsp_ring = nvme_rdma_alloc_ring(ibdev, queue->queue_size,
 			sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@@ -494,8 +488,6 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
 
 out_destroy_qp:
 	rdma_destroy_qp(queue->cm_id);
-out_destroy_ib_cq:
-	ib_free_cq(queue->ib_cq);
 out_put_dev:
 	nvme_rdma_dev_put(queue->device);
 	return ret;
@@ -999,7 +991,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
 static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
 		const char *op)
 {
-	struct nvme_rdma_queue *queue = cq->cq_context;
+	struct nvme_rdma_queue *queue = wc->qp->qp_context;
 	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
 
 	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
@@ -1361,7 +1353,7 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
 {
 	struct nvme_rdma_qe *qe =
 		container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
-	struct nvme_rdma_queue *queue = cq->cq_context;
+	struct nvme_rdma_queue *queue = wc->qp->qp_context;
 	struct ib_device *ibdev = queue->device->dev;
 	struct nvme_completion *cqe = qe->data;
 	const size_t len = sizeof(struct nvme_completion);
@@ -1678,7 +1670,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 {
 	struct nvme_rdma_queue *queue = hctx->driver_data;
-	struct ib_cq *cq = queue->ib_cq;
+	struct ib_cq *cq = queue->cm_id->qp->recv_cq;
 	struct ib_wc wc;
 	int found = 0;
 
-- 
2.14.1




More information about the Linux-nvme mailing list