[RFC PATCH v2] arm DMA: Fix allocation from CMA for coherent DMA
Lorenzo Nava
lorenx4 at gmail.com
Wed Jun 10 12:34:43 PDT 2015
On Wed, Jun 10, 2015 at 6:28 PM, Catalin Marinas
<catalin.marinas at arm.com> wrote:
>
> On Wed, Jun 03, 2015 at 07:15:45PM +0200, Lorenzo Nava wrote:
> > This patch allows the use of CMA for DMA coherent memory allocation.
> > At the moment if the input parameter "is_coherent" is set to true
> > the allocation is not made using the CMA, which I think is not the
> > desired behaviour.
> >
> > Signed-off-by: Lorenzo Nava <lorenx4 at xxxxxxxx>
> > ---
> > Changes in v2:
> > correct __arm_dma_free() according to __dma_alloc() allocation
> > ---
> > arch/arm/mm/dma-mapping.c | 6 +++---
> > 1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> > index 7e7583d..15643b9 100644
> > --- a/arch/arm/mm/dma-mapping.c
> > +++ b/arch/arm/mm/dma-mapping.c
> > @@ -645,9 +645,9 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> > size = PAGE_ALIGN(size);
> > want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
> >
> > - if (is_coherent || nommu())
> > + if (nommu())
> > addr = __alloc_simple_buffer(dev, size, gfp, &page);
> > - else if (!(gfp & __GFP_WAIT))
> > + else if (!is_coherent && !(gfp & __GFP_WAIT))
> > addr = __alloc_from_pool(size, &page);
> > else if (!dev_get_cma_area(dev))
> > addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller, want_vaddr);
>
> So while you allow __alloc_from_contiguous() to be called when
> is_coherent, the memory returned is still non-cacheable. The reason is
> that the "prot" argument passed to __dma_alloc() in
> arm_coherent_dma_alloc() is pgprot_dmacoherent(PAGE_KERNEL) which means
> Normal NonCacheable memory. The mmap seems to create a cacheable mapping
> as vma->vm_page_prot is not passed through __get_dma_pgprot().
>
> I think you need something like below, completely untested:
>
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index 1ced8a0f7a52..1ee3d8e8c313 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -573,11 +573,13 @@ static void __free_from_contiguous(struct device *dev, struct page *page,
> dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
> }
>
> -static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
> +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
> + bool coherent)
> {
> - prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
> - pgprot_writecombine(prot) :
> - pgprot_dmacoherent(prot);
> + if (dma_get_attr(DMA_ATTR_WRITE_COMBINE))
> + prot = pgprot_writecombine(prot);
> + else if (!coherent)
> + prot = pgprot_dmacoherent(prot);
> return prot;
> }
>
> @@ -587,7 +589,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
>
> #define nommu() 1
>
> -#define __get_dma_pgprot(attrs, prot) __pgprot(0)
> +#define __get_dma_pgprot(attrs, prot, coherent) __pgprot(0)
> #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL
> #define __alloc_from_pool(size, ret_page) NULL
> #define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL
> @@ -670,7 +672,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> gfp_t gfp, struct dma_attrs *attrs)
> {
> - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
> + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
> void *memory;
>
> if (dma_alloc_from_coherent(dev, size, handle, &memory))
> @@ -683,7 +685,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
> dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> {
> - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
> + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, true);
> void *memory;
>
> if (dma_alloc_from_coherent(dev, size, handle, &memory))
> @@ -733,7 +735,7 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
> struct dma_attrs *attrs)
> {
> #ifdef CONFIG_MMU
> - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false);
> #endif /* CONFIG_MMU */
> return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
> }
> @@ -1362,7 +1364,7 @@ static void __iommu_free_atomic(struct device *dev, void *cpu_addr,
> static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
> dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> {
> - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
> + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_device_dma_coherent(dev));
> struct page **pages;
> void *addr = NULL;
>
> @@ -1414,7 +1416,7 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
> unsigned long usize = vma->vm_end - vma->vm_start;
> struct page **pages = __iommu_get_pages(cpu_addr, attrs);
>
> - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
> + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, is_device_dma_coherent(dev));
>
> if (!pages)
> return -ENXIO;
Thanks for the answer.
Well the final scope of this patch is just to fix what in my opinion
is an incorrect behaviour: the lack of use of CMA when the flag
"is_coherent" is set.
Of course it still exists the problem of modify the attribute to make
the memory cacheable, but it is something I would like to do in a
second step (the patch you posted is of course a good starting point).
I think that the current implementation maps memory keeping non
cacheable attributes enable, because the 'attrs' parameter passed to
arm_dma_mmap() has no WRITE_COMBINE attribute set (according to
dma_mmap_coherent() in include/asm-generic/dma-mapping-common.h).
I also notice this patch that is pending "[PATCH v3]
arm/mm/dma-mapping.c: Add arm_coherent_dma_mmap": it modifies the
mapping of memory for coherent DMA. I want to understand if the merge
of this patch requires any other modification to guarantee that
coherent memory is allocated with cacheable attributes.
Thanks.
More information about the linux-arm-kernel
mailing list