[PATCH] nvme-rdma: Use mr pool

Sagi Grimberg sagi at grimberg.me
Tue Nov 7 23:34:34 PST 2017


> @@ -490,8 +456,27 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
>   		goto out_destroy_qp;
>   	}
>   
> +	if (idx == 0)
> +		queue->ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
> +				ibdev->attrs.max_fast_reg_page_list_len);

Please keep that where it was.

> +
> +	ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
> +			      queue->queue_size,
> +			      IB_MR_TYPE_MEM_REG,
> +			      queue->ctrl->max_fr_pages);
> +

Why the extra newline?

> +	if (ret) {
> +		dev_err(queue->ctrl->ctrl.device,
> +			"failed to initialize MR pool sized %d for QID %d\n",
> +			queue->queue_size, idx);
> +		goto out_destroy_ring;
> +	}
> +
>   	return 0;
>   
> +out_destroy_ring:
> +	nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
> +			    sizeof(struct nvme_completion), DMA_FROM_DEVICE);
>   out_destroy_qp:
>   	ib_destroy_qp(queue->qp);
>   out_destroy_ib_cq:
> @@ -764,9 +749,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
>   
>   	ctrl->device = ctrl->queues[0].device;
>   
> -	ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
> -		ctrl->device->dev->attrs.max_fast_reg_page_list_len);
> -
>   	if (new) {
>   		ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
>   		if (IS_ERR(ctrl->ctrl.admin_tagset)) {
> @@ -779,11 +761,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
>   			error = PTR_ERR(ctrl->ctrl.admin_q);
>   			goto out_free_tagset;
>   		}
> -	} else {
> -		error = blk_mq_reinit_tagset(&ctrl->admin_tag_set,
> -					     nvme_rdma_reinit_request);
> -		if (error)
> -			goto out_free_queue;
>   	}
>   
>   	error = nvme_rdma_start_queue(ctrl, 0);
> @@ -863,11 +840,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
>   			goto out_free_tag_set;
>   		}
>   	} else {
> -		ret = blk_mq_reinit_tagset(&ctrl->tag_set,
> -					   nvme_rdma_reinit_request);
> -		if (ret)
> -			goto out_free_io_queues;
> -
>   		blk_mq_update_nr_hw_queues(&ctrl->tag_set,
>   			ctrl->ctrl.queue_count - 1);
>   	}
> @@ -1065,14 +1037,19 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
>   	if (!blk_rq_bytes(rq))
>   		return;
>   
> -	if (req->mr->need_inval) {
> -		res = nvme_rdma_inv_rkey(queue, req);
> -		if (unlikely(res < 0)) {
> -			dev_err(ctrl->ctrl.device,
> -				"Queueing INV WR for rkey %#x failed (%d)\n",
> -				req->mr->rkey, res);
> -			nvme_rdma_error_recovery(queue->ctrl);
> +	if (req->mr) {
> +		if (req->mr->need_inval) {
> +			res = nvme_rdma_inv_rkey(queue, req);
> +			if (unlikely(res < 0)) {
> +				dev_err(ctrl->ctrl.device,
> +					"Queueing INV WR for rkey %#x failed (%d)\n",
> +					req->mr->rkey, res);
> +				nvme_rdma_error_recovery(queue->ctrl);
> +			}
>   		}
> +
> +		ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
> +		req->mr = NULL;

restoring the MR before the local invalidation completed? that's wrong.
You should rebase on top of my completion signaling fixes. I have
v2 in the pipe.

>   	}
>   
>   	ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
> @@ -1131,12 +1108,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
>   	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
>   	int nr;
>   
> +	req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
> +	if (!req->mr)
> +		return -EAGAIN;

unlikely

> +
>   	/*
>   	 * Align the MR to a 4K page size to match the ctrl page size and
>   	 * the block virtual boundary.
>   	 */
>   	nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
>   	if (unlikely(nr < count)) {
> +		ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
> +		req->mr = NULL;
>   		if (nr < 0)
>   			return nr;
>   		return -EINVAL;
> @@ -1176,7 +1159,6 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
>   
>   	req->num_sge = 1;
>   	req->inline_data = false;
> -	req->mr->need_inval = false;
>   
>   	c->common.flags |= NVME_CMD_SGL_METABUF;
>   
> @@ -1362,7 +1344,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
>   	if (rq->tag == tag)
>   		ret = 1;
>   
> -	if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
> +	if (req->mr && (wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
>   	    wc->ex.invalidate_rkey == req->mr->rkey)
>   		req->mr->need_inval = false;

If you got a remote invalidate without an MR its a protocol error.

>   
> @@ -1674,7 +1656,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
>   	if (req_op(rq) == REQ_OP_FLUSH)
>   		flush = true;
>   	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
> -			req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
> +			req->mr ? &req->reg_wr.wr : NULL, flush);

Please keep the need_inval condition as well.

>   	if (unlikely(err)) {
>   		nvme_rdma_unmap_data(queue, rq);
>   		goto err;
> 



More information about the Linux-nvme mailing list