[PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
Sagi Grimberg
sagi at grimberg.me
Mon Jan 16 13:12:55 PST 2017
Hey Parav,
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 6c1c368..fe7e257 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
> struct nvmet_rdma_cmd *cmd)
> {
> struct ib_recv_wr *bad_wr;
> + int i;
> +
> + for (i = 0; i < 2; i++) {
> + if (cmd->sge[i].length)
> + ib_dma_sync_single_for_device(ndev->device,
> + cmd->sge[0].addr, cmd->sge[0].length,
> + DMA_FROM_DEVICE);
> + }
a. you test on sge[i] but sync sge[0].
b. I don't think we need the for statement, lest keep it open-coded
for [0] and [1].
>
> if (ndev->srq)
> return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> @@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
> struct nvmet_rdma_rsp *rsp =
> container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
>
> + ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
> + rsp->send_sge.addr, rsp->send_sge.length,
> + DMA_TO_DEVICE);
Why do you need to sync_for_cpu here? you have no interest in the
data at this point.
> +
> nvmet_rdma_release_rsp(rsp);
>
> if (unlikely(wc->status != IB_WC_SUCCESS &&
> @@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
> first_wr = &rsp->send_wr;
>
> nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
> +
> + ib_dma_sync_single_for_device(rsp->queue->dev->device,
> + rsp->send_sge.addr, rsp->send_sge.length,
> + DMA_TO_DEVICE);
> +
> if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
> pr_err("sending cmd response failed\n");
> nvmet_rdma_release_rsp(rsp);
> @@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
> static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
> struct nvmet_rdma_rsp *cmd)
> {
> + int i;
> u16 status;
>
> cmd->queue = queue;
> cmd->n_rdma = 0;
> cmd->req.port = queue->port;
>
> + for (i = 0; i < 2; i++) {
> + if (cmd->cmd->sge[i].length)
> + ib_dma_sync_single_for_cpu(queue->dev->device,
> + cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length,
> + DMA_FROM_DEVICE);
> + }
Again, we don't need the for statement.
Also, I think we can optimize a bit by syncing the in-capsule page
only if:
1. it was posted for recv (sge has length)
2. its a write command
3. it has in-capsule data.
So, here lets sync the sqe (sge[0]) and sync the in-capsule page
in nvmet_rdma_map_sgl_inline().
More information about the Linux-nvme
mailing list