[RFC PATCH] ARM: add coherent dma ops
Marek Szyprowski
m.szyprowski at samsung.com
Mon Aug 13 02:15:22 EDT 2012
Hi Rob,
On Thursday, August 09, 2012 7:37 AM Rob Herring wrote:
> From: Rob Herring <rob.herring at calxeda.com>
>
> arch_is_coherent is problematic as it is a global symbol. This
> doesn't work for multi-platform kernels or platforms which can support
> per device coherent DMA.
>
> This adds arm_coherent_dma_ops to be used for devices which connected
> coherently (i.e. to the ACP port on Cortex-A9 or A15). The arm_dma_ops
> are modified at boot when arch_is_coherent is true.
Thanks for the patch. I had something similar on my TODO list, but had not enough time for
it. I like this patch but I have some comments.
> This does not address arch_is_coherent used in iommu dma ops.
In the initial version we might get rid of arch_is_coherent() usage in iommu dma ops and
implement it when a real coherent hw with iommu will be available.
> Signed-off-by: Rob Herring <rob.herring at calxeda.com>
> Cc: Russell King <linux at arm.linux.org.uk>
> Cc: Marek Szyprowski <m.szyprowski at samsung.com>
> ---
> Compile tested only.
> arch/arm/mm/dma-mapping.c | 89 +++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 77 insertions(+), 12 deletions(-)
>
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index c2cdf65..8875cd4 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -73,11 +73,18 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
> unsigned long offset, size_t size, enum dma_data_direction dir,
> struct dma_attrs *attrs)
> {
> - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
> + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
> __dma_page_cpu_to_dev(page, offset, size, dir);
> return pfn_to_dma(dev, page_to_pfn(page)) + offset;
> }
>
> +static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page,
> + unsigned long offset, size_t size, enum dma_data_direction dir,
> + struct dma_attrs *attrs)
> +{
> + return pfn_to_dma(dev, page_to_pfn(page)) + offset;
> +}
> +
> /**
> * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
> * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
> @@ -96,7 +103,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
> size_t size, enum dma_data_direction dir,
> struct dma_attrs *attrs)
> {
> - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
> + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
> __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
> handle & ~PAGE_MASK, size, dir);
> }
> @@ -106,8 +113,7 @@ static void arm_dma_sync_single_for_cpu(struct device *dev,
> {
> unsigned int offset = handle & (PAGE_SIZE - 1);
> struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
> - if (!arch_is_coherent())
> - __dma_page_dev_to_cpu(page, offset, size, dir);
> + __dma_page_dev_to_cpu(page, offset, size, dir);
> }
>
> static void arm_dma_sync_single_for_device(struct device *dev,
> @@ -115,8 +121,7 @@ static void arm_dma_sync_single_for_device(struct device *dev,
> {
> unsigned int offset = handle & (PAGE_SIZE - 1);
> struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
> - if (!arch_is_coherent())
> - __dma_page_cpu_to_dev(page, offset, size, dir);
> + __dma_page_cpu_to_dev(page, offset, size, dir);
> }
>
> static int arm_dma_set_mask(struct device *dev, u64 dma_mask);
> @@ -138,6 +143,40 @@ struct dma_map_ops arm_dma_ops = {
> };
> EXPORT_SYMBOL(arm_dma_ops);
>
> +static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
> + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs);
> +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
> + dma_addr_t handle, struct dma_attrs *attrs);
> +
> +struct dma_map_ops arm_coherent_dma_ops = {
> + .alloc = arm_coherent_dma_alloc,
> + .free = arm_coherent_dma_free,
> + .mmap = arm_dma_mmap,
> + .get_sgtable = arm_dma_get_sgtable,
> + .map_page = arm_coherent_dma_map_page,
> + .map_sg = arm_dma_map_sg,
> + .set_dma_mask = arm_dma_set_mask,
> +};
> +EXPORT_SYMBOL(arm_coherent_dma_ops);
> +
> +static int __init dma_map_init(void)
> +{
> + if (!arch_is_coherent())
> + return 0;
> +
> + arm_dma_ops.map_page = arm_coherent_dma_map_page;
> + arm_dma_ops.unmap_page = NULL;
> + arm_dma_ops.map_sg = NULL;
> + arm_dma_ops.unmap_sg = NULL;
> + arm_dma_ops.sync_single_for_cpu = NULL;
> + arm_dma_ops.sync_single_for_device = NULL;
> + arm_dma_ops.sync_sg_for_cpu = NULL;
> + arm_dma_ops.sync_sg_for_device = NULL;
> + arm_dma_ops.alloc = arm_coherent_dma_alloc;
> + arm_dma_ops.free = arm_coherent_dma_free;
> +}
> +core_initcall(dma_map_init);
I would implement it in a bit different way. Overwriting structure entries is not the
cleanest approach and might lead to some misunderstandings. I would rather change
get_dma_ops() function in arch/arm/include/asm/dma-mapping.h to something like this:
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
{
if (dev && dev->archdata.dma_ops)
return dev->archdata.dma_ops;
return !arch_is_coherent() ? &arm_dma_ops : &arm_coherent_dma_ops;
}
This way the code is easy to understand and compiler can easily optimize out the above
check for 99% of architectures which are either coherent or not. In case of partially
coherent architectures, arch_is_coherent() will probably return false and coherent
devices will get their dma_map_ops initialized by platform code.
> +
> static u64 get_coherent_dma_mask(struct device *dev)
> {
> u64 mask = (u64)arm_dma_limit;
> @@ -538,7 +577,7 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t
> gfp,
>
>
> static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> - gfp_t gfp, pgprot_t prot, const void *caller)
> + gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller)
> {
> u64 mask = get_coherent_dma_mask(dev);
> struct page *page;
> @@ -571,7 +610,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t
> *handle,
> *handle = DMA_ERROR_CODE;
> size = PAGE_ALIGN(size);
>
> - if (arch_is_coherent() || nommu())
> + if (is_coherent || nommu())
> addr = __alloc_simple_buffer(dev, size, gfp, &page);
> else if (gfp & GFP_ATOMIC)
> addr = __alloc_from_pool(size, &page);
> @@ -599,7 +638,20 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> if (dma_alloc_from_coherent(dev, size, handle, &memory))
> return memory;
>
> - return __dma_alloc(dev, size, handle, gfp, prot,
> + return __dma_alloc(dev, size, handle, gfp, prot, false,
> + __builtin_return_address(0));
> +}
> +
> +static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
> + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> +{
> + pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> + void *memory;
> +
> + if (dma_alloc_from_coherent(dev, size, handle, &memory))
> + return memory;
> +
> + return __dma_alloc(dev, size, handle, gfp, prot, true,
> __builtin_return_address(0));
> }
>
> @@ -636,8 +688,9 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
> /*
> * Free a buffer as defined by the above mapping.
> */
> -void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
> - dma_addr_t handle, struct dma_attrs *attrs)
> +static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
> + dma_addr_t handle, struct dma_attrs *attrs,
> + bool is_coherent)
> {
> struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
>
> @@ -646,7 +699,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
>
> size = PAGE_ALIGN(size);
>
> - if (arch_is_coherent() || nommu()) {
> + if (is_coherent || nommu()) {
> __dma_free_buffer(page, size);
> } else if (!IS_ENABLED(CONFIG_CMA)) {
> __dma_free_remap(cpu_addr, size);
> @@ -662,6 +715,18 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
> }
> }
>
> +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
> + dma_addr_t handle, struct dma_attrs *attrs)
> +{
> + __arm_dma_free(dev, size, cpu_addr, handle, attrs, false);
> +}
> +
> +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
> + dma_addr_t handle, struct dma_attrs *attrs)
> +{
> + __arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
> +}
> +
> int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
> void *cpu_addr, dma_addr_t handle, size_t size,
> struct dma_attrs *attrs)
> --
> 1.7.9.5
Best regards
--
Marek Szyprowski
Samsung Poland R&D Center
More information about the linux-arm-kernel
mailing list