[PATCH] [PATCH] nvmet-rdma: Support 16K worth of inline data for write commands

Parav Pandit parav at mellanox.com
Tue Feb 7 14:49:57 PST 2017


I am correcting the typo 'lowe' to 'lower' and removing the extra PATCH prefix in v1.

Parav

> -----Original Message-----
> From: Parav Pandit [mailto:parav at mellanox.com]
> Sent: Tuesday, February 7, 2017 4:38 PM
> To: hch at lst.de; sagi at grimberg.me; james.smart at broadcom.com; linux-
> nvme at lists.infradead.org
> Cc: Parav Pandit <parav at mellanox.com>
> Subject: [PATCH] [PATCH] nvmet-rdma: Support 16K worth of inline data for
> write commands
> 
> This patch allows supporting 16Kbytes of inline data for write commands.
> 
> With null target below are the performance improvements achieved.
> Workload: random write, 70-30 mixed IOs
> null target: 250GB, 64 core CPU, single controller.
> Queue depth: 256 commands
> 
>            cpu idle %        iops (K)            latency (usec)
>            (higher better)   (higher better)     (lowe better)
> 
> Inline     16K       4K      16K       4K        16K       4K
> size
> io_size    random write      random write        random write
> 512        78        79      2349      2343      5.45      5.45
> 1K         78        78      2438      2417      5.78      5.29
> 2K         78        78      2437      2387      5.78      5.35
> 4K         78        79      2332      2274      5.75      5.62
> 8K         78        87      1308      711       11        21.65
> 16K        79        90      680       538       22        28.64
> 32K        80        95      337       333       47        47.41
> 
>            mix RW-30/70      mix RW-30/70        mix RW-30/70
> 512        78        78      2389      2349      5.43      5.45
> 1K         78        78      2250      2354      5.61      5.42
> 2K         79        78      2261      2294      5.62      5.60
> 4K         77        78      2180      2131      5.8       6.28
> 8K         78        79      1746      797       8.5       18.42
> 16K        78        86      943       628       15.90     23.76
> 32K        92        92      440       440       32        33.39
> 
> This is tested with modified Linux initiator that can support 16K worth of
> inline data.
> Applications which has typical 8K or 16K block size will benefit most out of this
> performance improvement.
> 
> Additionally when IOPs are throttled to 700K IOPs, cpu utilization and latency
> numbers are same for both the inline size; confirming that higher inline size is
> not consuming any extra CPU for serving same number of IOPs.
> 
>            cpu idle %        iops (K)            latency (usec)
>            (higher better)   (higher better)     (lowe better)
> 
> Inline     16K       4K      16K       4K       16K        4K
> size
> io_size    random write      random write        random write
> 4K         93        93      700       700       5.75      5.62
> 8K         86        87      700       700       11        21.65
> 16K        83        88      680       538       22        28.64
> 32K        94        94      337       333       47        47.41
> 
> Reviewed-by: Max Gurtovoy <maxg at mellanox.com>
> Signed-off-by: Parav Pandit <parav at mellanox.com>
> ---
>  drivers/nvme/target/rdma.c | 28 ++++++++++++++++++----------
>  1 file changed, 18 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index
> 1a57ab3..8bfadea 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -33,9 +33,9 @@
>  #include "nvmet.h"
> 
>  /*
> - * We allow up to a page of inline data to go with the SQE
> + * We allow inline data to go with the SQE up to 16K or page size
>   */
> -#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
> +#define NVMET_RDMA_INLINE_DATA_SIZE	16384
> 
>  struct nvmet_rdma_cmd {
>  	struct ib_sge		sge[2];
> @@ -256,15 +256,16 @@ static int nvmet_rdma_alloc_cmd(struct
> nvmet_rdma_device *ndev,
> 
>  	if (!admin) {
>  		c->inline_page = alloc_pages(GFP_KERNEL,
> -
> 	get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> +
> 	get_order(nvmet_rdma_ops.sqe_inline_size));
>  		if (!c->inline_page)
>  			goto out_unmap_cmd;
>  		c->sge[1].addr = ib_dma_map_page(ndev->device,
> -				c->inline_page, 0,
> NVMET_RDMA_INLINE_DATA_SIZE,
> +				c->inline_page, 0,
> +				nvmet_rdma_ops.sqe_inline_size,
>  				DMA_FROM_DEVICE);
>  		if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
>  			goto out_free_inline_page;
> -		c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
> +		c->sge[1].length = nvmet_rdma_ops.sqe_inline_size;
>  		c->sge[1].lkey = ndev->pd->local_dma_lkey;
>  	}
> 
> @@ -279,7 +280,7 @@ static int nvmet_rdma_alloc_cmd(struct
> nvmet_rdma_device *ndev,
>  out_free_inline_page:
>  	if (!admin) {
>  		__free_pages(c->inline_page,
> -
> 	get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> +
> 	get_order(nvmet_rdma_ops.sqe_inline_size));
>  	}
>  out_unmap_cmd:
>  	ib_dma_unmap_single(ndev->device, c->sge[0].addr, @@ -296,9
> +297,10 @@ static void nvmet_rdma_free_cmd(struct nvmet_rdma_device
> *ndev,  {
>  	if (!admin) {
>  		ib_dma_unmap_page(ndev->device, c->sge[1].addr,
> -				NVMET_RDMA_INLINE_DATA_SIZE,
> DMA_FROM_DEVICE);
> +				nvmet_rdma_ops.sqe_inline_size,
> +				DMA_FROM_DEVICE);
>  		__free_pages(c->inline_page,
> -
> 	get_order(NVMET_RDMA_INLINE_DATA_SIZE));
> +
> 	get_order(nvmet_rdma_ops.sqe_inline_size));
>  	}
>  	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
>  				sizeof(*c->nvme_cmd),
> DMA_FROM_DEVICE); @@ -592,7 +594,7 @@ static u16
> nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
>  	if (!nvme_is_write(rsp->req.cmd))
>  		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
> 
> -	if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) {
> +	if (off + len > nvmet_rdma_ops.sqe_inline_size) {
>  		pr_err("invalid inline data offset!\n");
>  		return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
>  	}
> @@ -1475,7 +1477,6 @@ static void nvmet_rdma_remove_port(struct
> nvmet_port *port)  static struct nvmet_fabrics_ops nvmet_rdma_ops = {
>  	.owner			= THIS_MODULE,
>  	.type			= NVMF_TRTYPE_RDMA,
> -	.sqe_inline_size	= NVMET_RDMA_INLINE_DATA_SIZE,
>  	.msdbd			= 1,
>  	.has_keyed_sgls		= 1,
>  	.add_port		= nvmet_rdma_add_port,
> @@ -1486,6 +1487,13 @@ static void nvmet_rdma_remove_port(struct
> nvmet_port *port)
> 
>  static int __init nvmet_rdma_init(void)  {
> +	/* Currently limit inline size to 16K on systems which has page size
> +	 * of 4K or less. For systems which has more than 4K page size,
> +	 * continue to use PAGE_SIZE worth of inline data.
> +	 */
> +	nvmet_rdma_ops.sqe_inline_size =
> +		round_up(NVMET_RDMA_INLINE_DATA_SIZE, PAGE_SIZE);
> +
>  	return nvmet_register_transport(&nvmet_rdma_ops);
>  }
> 
> --
> 1.8.3.1




More information about the Linux-nvme mailing list