[PATCH 1/2] dmaengine: xilinx_dma: Restore support for memcpy SG transfers

Vinod Koul vkoul at kernel.org
Tue Jul 13 21:58:37 PDT 2021


On 07-07-21, 00:43, Adrian Larumbe wrote:
> This is the old DMA_SG interface that was removed in commit
> c678fa66341c ("dmaengine: remove DMA_SG as it is dead code in kernel"). It
> has been renamed to DMA_MEMCPY_SG to better match the MEMSET and MEMSET_SG
> naming convention.
> 
> It should only be used for mem2mem copies, either main system memory or
> CPU-addressable device memory (like video memory on a PCI graphics card).
> 
> Bringing back this interface was prompted by the need to use the Xilinx
> CDMA device for mem2mem SG transfers. The current CDMA binding for
> device_prep_dma_memcpy_sg was partially borrowed from xlnx kernel tree, and
> expanded with extended address space support when linking descriptor
> segments and checking for incorrect zero transfer size.
> 
> Signed-off-by: Adrian Larumbe <adrian.martinezlarumbe at imgtec.com>
> ---
>  .../driver-api/dmaengine/provider.rst         |  11 ++
>  drivers/dma/dmaengine.c                       |   7 +
>  drivers/dma/xilinx/xilinx_dma.c               | 122 ++++++++++++++++++

Can you make this split... documentation patch, core change and then
driver

>  include/linux/dmaengine.h                     |  20 +++
>  4 files changed, 160 insertions(+)
> 
> diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
> index ddb0a81a796c..9f0efe9e9952 100644
> --- a/Documentation/driver-api/dmaengine/provider.rst
> +++ b/Documentation/driver-api/dmaengine/provider.rst
> @@ -162,6 +162,17 @@ Currently, the types available are:
>  
>    - The device is able to do memory to memory copies
>  
> +- - DMA_MEMCPY_SG
> +
> +  - The device supports memory to memory scatter-gather transfers.
> +
> +  - Even though a plain memcpy can look like a particular case of a
> +    scatter-gather transfer, with a single chunk to transfer, it's a
> +    distinct transaction type in the mem2mem transfer case. This is
> +    because some very simple devices might be able to do contiguous
> +    single-chunk memory copies, but have no support for more
> +    complex SG transfers.

How does one deal with cases where
 - src_sg_len and dstn_sg_len are different?
 - src_sg and dstn_sg are different lists (maybe different number of
   entries with different lengths..)

I think we need to document these cases or limitations..


> +
>  - DMA_XOR
>  
>    - The device is able to perform XOR operations on memory areas
> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
> index af3ee288bc11..c4e3334b04cf 100644
> --- a/drivers/dma/dmaengine.c
> +++ b/drivers/dma/dmaengine.c
> @@ -1160,6 +1160,13 @@ int dma_async_device_register(struct dma_device *device)
>  		return -EIO;
>  	}
>  
> +	if (dma_has_cap(DMA_MEMCPY_SG, device->cap_mask) && !device->device_prep_dma_memcpy_sg) {
> +		dev_err(device->dev,
> +			"Device claims capability %s, but op is not defined\n",
> +			"DMA_MEMCPY_SG");
> +		return -EIO;
> +	}
> +
>  	if (dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor) {
>  		dev_err(device->dev,
>  			"Device claims capability %s, but op is not defined\n",
> diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
> index 75c0b8e904e5..0e2bf75d42d3 100644
> --- a/drivers/dma/xilinx/xilinx_dma.c
> +++ b/drivers/dma/xilinx/xilinx_dma.c
> @@ -2108,6 +2108,126 @@ xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
>  	return NULL;
>  }
>  
> +/**
> + * xilinx_cdma_prep_memcpy_sg - prepare descriptors for a memcpy_sg transaction
> + * @dchan: DMA channel
> + * @dst_sg: Destination scatter list
> + * @dst_sg_len: Number of entries in destination scatter list
> + * @src_sg: Source scatter list
> + * @src_sg_len: Number of entries in source scatter list
> + * @flags: transfer ack flags
> + *
> + * Return: Async transaction descriptor on success and NULL on failure
> + */
> +static struct dma_async_tx_descriptor *xilinx_cdma_prep_memcpy_sg(
> +			struct dma_chan *dchan, struct scatterlist *dst_sg,
> +			unsigned int dst_sg_len, struct scatterlist *src_sg,
> +			unsigned int src_sg_len, unsigned long flags)
> +{
> +	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
> +	struct xilinx_dma_tx_descriptor *desc;
> +	struct xilinx_cdma_tx_segment *segment, *prev = NULL;
> +	struct xilinx_cdma_desc_hw *hw;
> +	size_t len, dst_avail, src_avail;
> +	dma_addr_t dma_dst, dma_src;
> +
> +	if (unlikely(dst_sg_len == 0 || src_sg_len == 0))
> +		return NULL;
> +
> +	if (unlikely(!dst_sg  || !src_sg))
> +		return NULL;

no check for dst_sg_len == src_sg_len or it doesnt matter here?

> +
> +	desc = xilinx_dma_alloc_tx_descriptor(chan);
> +	if (!desc)
> +		return NULL;
> +
> +	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
> +	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
> +
> +	dst_avail = sg_dma_len(dst_sg);
> +	src_avail = sg_dma_len(src_sg);
> +	/*
> +	 * loop until there is either no more source or no more destination
> +	 * scatterlist entry
> +	 */
> +	while (true) {
> +		len = min_t(size_t, src_avail, dst_avail);
> +		len = min_t(size_t, len, chan->xdev->max_buffer_len);
> +		if (len == 0)
> +			goto fetch;
> +
> +		/* Allocate the link descriptor from DMA pool */
> +		segment = xilinx_cdma_alloc_tx_segment(chan);
> +		if (!segment)
> +			goto error;
> +
> +		dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) -
> +			dst_avail;
> +		dma_src = sg_dma_address(src_sg) + sg_dma_len(src_sg) -
> +			src_avail;
> +		hw = &segment->hw;
> +		hw->control = len;
> +		hw->src_addr = dma_src;
> +		hw->dest_addr = dma_dst;
> +		if (chan->ext_addr) {
> +			hw->src_addr_msb = upper_32_bits(dma_src);
> +			hw->dest_addr_msb = upper_32_bits(dma_dst);
> +		}
> +
> +		if (prev) {
> +			prev->hw.next_desc = segment->phys;
> +			if (chan->ext_addr)
> +				prev->hw.next_desc_msb =
> +					upper_32_bits(segment->phys);
> +		}
> +
> +		prev = segment;
> +		dst_avail -= len;
> +		src_avail -= len;
> +		list_add_tail(&segment->node, &desc->segments);
> +
> +fetch:
> +		/* Fetch the next dst scatterlist entry */
> +		if (dst_avail == 0) {
> +			if (dst_sg_len == 0)
> +				break;
> +			dst_sg = sg_next(dst_sg);
> +			if (dst_sg == NULL)
> +				break;
> +			dst_sg_len--;
> +			dst_avail = sg_dma_len(dst_sg);
> +		}
> +		/* Fetch the next src scatterlist entry */
> +		if (src_avail == 0) {
> +			if (src_sg_len == 0)
> +				break;
> +			src_sg = sg_next(src_sg);
> +			if (src_sg == NULL)
> +				break;
> +			src_sg_len--;
> +			src_avail = sg_dma_len(src_sg);
> +		}
> +	}
> +
> +	if (list_empty(&desc->segments)) {
> +		dev_err(chan->xdev->dev,
> +			"%s: Zero-size SG transfer requested\n", __func__);
> +		goto error;
> +	}
> +
> +	/* Link the last hardware descriptor with the first. */
> +	segment = list_first_entry(&desc->segments,
> +				struct xilinx_cdma_tx_segment, node);
> +	desc->async_tx.phys = segment->phys;
> +	prev->hw.next_desc = segment->phys;
> +
> +	return &desc->async_tx;
> +
> +error:
> +	xilinx_dma_free_tx_descriptor(chan, desc);
> +	return NULL;
> +}
> +
>  /**
>   * xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
>   * @dchan: DMA channel
> @@ -3094,7 +3214,9 @@ static int xilinx_dma_probe(struct platform_device *pdev)
>  					  DMA_RESIDUE_GRANULARITY_SEGMENT;
>  	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
>  		dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask);
> +		dma_cap_set(DMA_MEMCPY_SG, xdev->common.cap_mask);
>  		xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy;
> +		xdev->common.device_prep_dma_memcpy_sg = xilinx_cdma_prep_memcpy_sg;
>  		/* Residue calculation is supported by only AXI DMA and CDMA */
>  		xdev->common.residue_granularity =
>  					  DMA_RESIDUE_GRANULARITY_SEGMENT;
> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> index 004736b6a9c8..7c342f77d8eb 100644
> --- a/include/linux/dmaengine.h
> +++ b/include/linux/dmaengine.h
> @@ -50,6 +50,7 @@ enum dma_status {
>   */
>  enum dma_transaction_type {
>  	DMA_MEMCPY,
> +	DMA_MEMCPY_SG,
>  	DMA_XOR,
>  	DMA_PQ,
>  	DMA_XOR_VAL,
> @@ -891,6 +892,11 @@ struct dma_device {
>  	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
>  		struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
>  		size_t len, unsigned long flags);
> +	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy_sg)(
> +		struct dma_chan *chan,
> +		struct scatterlist *dst_sg, unsigned int dst_nents,
> +		struct scatterlist *src_sg, unsigned int src_nents,
> +		unsigned long flags);
>  	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
>  		struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
>  		unsigned int src_cnt, size_t len, unsigned long flags);
> @@ -1053,6 +1059,20 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy(
>  						    len, flags);
>  }
>  
> +static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy_sg(
> +		struct dma_chan *chan,
> +		struct scatterlist *dst_sg, unsigned int dst_nents,
> +		struct scatterlist *src_sg, unsigned int src_nents,
> +		unsigned long flags)
> +{
> +	if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy_sg)
> +		return NULL;
> +
> +	return chan->device->device_prep_dma_memcpy_sg(chan, dst_sg, dst_nents,
> +						       src_sg, src_nents,
> +						       flags);
> +}
> +
>  static inline bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan,
>  		enum dma_desc_metadata_mode mode)
>  {
> -- 
> 2.17.1

-- 
~Vinod



More information about the linux-arm-kernel mailing list