[PATCH 2/2] dmaengine: xdmac: Add scatter gathered memset support

Vinod Koul vinod.koul at intel.com
Thu Jul 16 05:01:54 PDT 2015


On Mon, Jul 06, 2015 at 12:19:24PM +0200, Maxime Ripard wrote:
> The XDMAC also supports memset operations over discontiguous areas. Add the
> necessary logic to support this.
> 
> Signed-off-by: Maxime Ripard <maxime.ripard at free-electrons.com>
> ---
>  drivers/dma/at_xdmac.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 165 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
> index cf1213de7865..1a2d9a39ff25 100644
> --- a/drivers/dma/at_xdmac.c
> +++ b/drivers/dma/at_xdmac.c
> @@ -1133,7 +1133,7 @@ static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan,
>  	 * SAMA5D4x), so we can use the same interface for source and dest,
>  	 * that solves the fact we don't know the direction.
>  	 */
> -	u32			chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM
> +	u32			chan_cc = AT_XDMAC_CC_DAM_UBS_AM
>  					| AT_XDMAC_CC_SAM_INCREMENTED_AM
>  					| AT_XDMAC_CC_DIF(0)
>  					| AT_XDMAC_CC_SIF(0)
> @@ -1201,6 +1201,168 @@ at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
>  	return &desc->tx_dma_desc;
>  }
>  
> +static struct dma_async_tx_descriptor *
> +at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
> +			    unsigned int sg_len, int value,
> +			    unsigned long flags)
> +{
> +	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
> +	struct at_xdmac_desc	*desc, *pdesc = NULL,
> +				*ppdesc = NULL, *first = NULL;
> +	struct scatterlist	*sg, *psg = NULL, *ppsg = NULL;
> +	size_t			stride = 0, pstride = 0, len = 0;
> +	int			i;
> +
> +	if (!sgl)
> +		return NULL;
> +
> +	dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n",
> +		__func__, sg_len, value, flags);
> +
> +	/* Prepare descriptors. */
> +	for_each_sg(sgl, sg, sg_len, i) {
> +		dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n",
> +			__func__, sg_dma_address(sg), sg_dma_len(sg),
> +			value, flags);
> +		desc = at_xdmac_memset_create_desc(chan, atchan,
> +						   sg_dma_address(sg),
> +						   sg_dma_len(sg),
> +						   value);
> +		if (!desc && first)
> +			list_splice_init(&first->descs_list,
> +					 &atchan->free_descs_list);
> +
> +		if (!first)
> +			first = desc;
> +
> +		/* Update our strides */
> +		pstride = stride;
> +		if (psg)
> +			stride = sg_dma_address(sg) -
> +				(sg_dma_address(psg) + sg_dma_len(psg));
> +
> +		/*
> +		 * The scatterlist API gives us only the address and
> +		 * length of each elements.
> +		 *
> +		 * Unfortunately, we don't have the stride, which we
> +		 * will need to compute.
> +		 *
> +		 * That make us end up in a situation like this one:
> +		 *    len    stride    len    stride    len
> +		 * +-------+        +-------+        +-------+
> +		 * |  N-2  |        |  N-1  |        |   N   |
> +		 * +-------+        +-------+        +-------+
> +		 *
> +		 * We need all these three elements (N-2, N-1 and N)
> +		 * to actually take the decision on whether we need to
> +		 * queue N-1 or reuse N-2.
> +		 *
> +		 * We will only consider N if it is the last element.
> +		 */
Why do you need stride?

This is scatterlist so the computation of stride sounds odd here. Ideally
you should take the scatterlist and program the lli for controller.

-- 
~Vinod

> +		if (ppdesc && pdesc) {
> +			if ((stride == pstride) &&
> +			    (sg_dma_len(ppsg) == sg_dma_len(psg))) {
> +				dev_dbg(chan2dev(chan),
> +					"%s: desc 0x%p can be merged with desc 0x%p\n",
> +					__func__, pdesc, ppdesc);
> +
> +				/*
> +				 * Increment the block count of the
> +				 * N-2 descriptor
> +				 */
> +				at_xdmac_increment_block_count(chan, ppdesc);
> +				ppdesc->lld.mbr_dus = stride;
> +
> +				/*
> +				 * Put back the N-1 descriptor in the
> +				 * free descriptor list
> +				 */
> +				list_add_tail(&pdesc->desc_node,
> +					      &atchan->free_descs_list);
> +
> +				/*
> +				 * Make our N-1 descriptor pointer
> +				 * point to the N-2 since they were
> +				 * actually merged.
> +				 */
> +				pdesc = ppdesc;
> +
> +			/*
> +			 * Rule out the case where we don't have
> +			 * pstride computed yet (our second sg
> +			 * element)
> +			 *
> +			 * We also want to catch the case where there
> +			 * would be a negative stride,
> +			 */
> +			} else if (pstride ||
> +				   sg_dma_address(sg) < sg_dma_address(psg)) {
> +				/*
> +				 * Queue the N-1 descriptor after the
> +				 * N-2
> +				 */
> +				at_xdmac_queue_desc(chan, ppdesc, pdesc);
> +
> +				/*
> +				 * Add the N-1 descriptor to the list
> +				 * of the descriptors used for this
> +				 * transfer
> +				 */
> +				list_add_tail(&desc->desc_node,
> +					      &first->descs_list);
> +				dev_dbg(chan2dev(chan),
> +					"%s: add desc 0x%p to descs_list 0x%p\n",
> +					__func__, desc, first);
> +			}
> +		}
> +
> +		/*
> +		 * If we are the last element, just see if we have the
> +		 * same size than the previous element.
> +		 *
> +		 * If so, we can merge it with the previous descriptor
> +		 * since we don't care about the stride anymore.
> +		 */
> +		if ((i == (sg_len - 1)) &&
> +		    sg_dma_len(ppsg) == sg_dma_len(psg)) {
> +			dev_dbg(chan2dev(chan),
> +				"%s: desc 0x%p can be merged with desc 0x%p\n",
> +				__func__, desc, pdesc);
> +
> +			/*
> +			 * Increment the block count of the N-1
> +			 * descriptor
> +			 */
> +			at_xdmac_increment_block_count(chan, pdesc);
> +			pdesc->lld.mbr_dus = stride;
> +
> +			/*
> +			 * Put back the N descriptor in the free
> +			 * descriptor list
> +			 */
> +			list_add_tail(&desc->desc_node,
> +				      &atchan->free_descs_list);
> +		}
> +
> +		/* Update our descriptors */
> +		ppdesc = pdesc;
> +		pdesc = desc;
> +
> +		/* Update our scatter pointers */
> +		ppsg = psg;
> +		psg = sg;
> +
> +		len += sg_dma_len(sg);
> +	}
> +
> +	first->tx_dma_desc.cookie = -EBUSY;
> +	first->tx_dma_desc.flags = flags;
> +	first->xfer_size = len;
> +
> +	return &first->tx_dma_desc;
> +}
> +
>  static enum dma_status
>  at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
>  		struct dma_tx_state *txstate)
> @@ -1734,6 +1896,7 @@ static int at_xdmac_probe(struct platform_device *pdev)
>  	dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask);
>  	dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
>  	dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask);
> +	dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask);
>  	dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
>  	/*
>  	 * Without DMA_PRIVATE the driver is not able to allocate more than
> @@ -1749,6 +1912,7 @@ static int at_xdmac_probe(struct platform_device *pdev)
>  	atxdmac->dma.device_prep_interleaved_dma	= at_xdmac_prep_interleaved;
>  	atxdmac->dma.device_prep_dma_memcpy		= at_xdmac_prep_dma_memcpy;
>  	atxdmac->dma.device_prep_dma_memset		= at_xdmac_prep_dma_memset;
> +	atxdmac->dma.device_prep_dma_memset_sg		= at_xdmac_prep_dma_memset_sg;
>  	atxdmac->dma.device_prep_slave_sg		= at_xdmac_prep_slave_sg;
>  	atxdmac->dma.device_config			= at_xdmac_device_config;
>  	atxdmac->dma.device_pause			= at_xdmac_device_pause;
> -- 
> 2.4.5
> 

-- 



More information about the linux-arm-kernel mailing list