[PATCH v2 07/17] ARM: dma-mapping: no need to clean overlapping cache lines on invalidate
Nicolas Pitre
nico at fluxnic.net
Mon Nov 23 14:35:04 EST 2009
On Mon, 23 Nov 2009, Russell King - ARM Linux wrote:
> Since we now clean the DMA buffers on map, there's no need to clean
> overlapping cache lines on invalidation anymore. (Note: the DMA API
> prohibits other data sharing the same cache line as a DMA buffer
> anyway.)
>
> Signed-off-by: Russell King <rmk+kernel at arm.linux.org.uk>
> Tested-By: Jamie Iles <jamie at jamieiles.com>
> Reviewed-by: Catalin Marinas <catalin.marinas at arm.com>
Acked-by: Nicolas Pitre <nico at marvell.com>
> ---
> arch/arm/mm/cache-fa.S | 5 -----
> arch/arm/mm/cache-feroceon-l2.c | 19 +++----------------
> arch/arm/mm/cache-l2x0.c | 12 +-----------
> arch/arm/mm/cache-v4wb.S | 5 -----
> arch/arm/mm/cache-v6.S | 13 -------------
> arch/arm/mm/cache-v7.S | 6 ------
> arch/arm/mm/proc-arm1020.S | 8 --------
> arch/arm/mm/proc-arm1020e.S | 4 ----
> arch/arm/mm/proc-arm1022.S | 4 ----
> arch/arm/mm/proc-arm1026.S | 4 ----
> arch/arm/mm/proc-arm920.S | 4 ----
> arch/arm/mm/proc-arm922.S | 4 ----
> arch/arm/mm/proc-arm925.S | 6 ------
> arch/arm/mm/proc-arm926.S | 6 ------
> arch/arm/mm/proc-arm946.S | 7 -------
> arch/arm/mm/proc-feroceon.S | 8 --------
> arch/arm/mm/proc-mohawk.S | 4 ----
> arch/arm/mm/proc-xsc3.S | 4 ----
> arch/arm/mm/proc-xscale.S | 4 ----
> 19 files changed, 4 insertions(+), 123 deletions(-)
>
> diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
> index b63a8f7..1711386 100644
> --- a/arch/arm/mm/cache-fa.S
> +++ b/arch/arm/mm/cache-fa.S
> @@ -157,12 +157,7 @@ ENTRY(fa_flush_kern_dcache_page)
> * - end - virtual end address
> */
> ENTRY(fa_dma_inv_range)
> - tst r0, #CACHE_DLINESIZE - 1
> bic r0, r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - bic r1, r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
> index 6e77c04..6834e07 100644
> --- a/arch/arm/mm/cache-feroceon-l2.c
> +++ b/arch/arm/mm/cache-feroceon-l2.c
> @@ -163,26 +163,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
>
> static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
> {
> - /*
> - * Clean and invalidate partial first cache line.
> - */
> - if (start & (CACHE_LINE_SIZE - 1)) {
> - l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1));
> - start = (start | (CACHE_LINE_SIZE - 1)) + 1;
> - }
> -
> - /*
> - * Clean and invalidate partial last cache line.
> - */
> - if (start < end && end & (CACHE_LINE_SIZE - 1)) {
> - l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1));
> - end &= ~(CACHE_LINE_SIZE - 1);
> - }
> + start &= ~(CACHE_LINE_SIZE - 1);
> + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
>
> /*
> * Invalidate all full cache lines between 'start' and 'end'.
> */
> - while (start < end) {
> + while (start != end) {
> unsigned long range_end = calc_range_end(start, end);
> l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE);
> start = range_end;
> diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
> index b480f1d..a726b57 100644
> --- a/arch/arm/mm/cache-l2x0.c
> +++ b/arch/arm/mm/cache-l2x0.c
> @@ -57,17 +57,7 @@ static void l2x0_inv_range(unsigned long start, unsigned long end)
> {
> unsigned long addr;
>
> - if (start & (CACHE_LINE_SIZE - 1)) {
> - start &= ~(CACHE_LINE_SIZE - 1);
> - sync_writel(start, L2X0_CLEAN_INV_LINE_PA, 1);
> - start += CACHE_LINE_SIZE;
> - }
> -
> - if (end & (CACHE_LINE_SIZE - 1)) {
> - end &= ~(CACHE_LINE_SIZE - 1);
> - sync_writel(end, L2X0_CLEAN_INV_LINE_PA, 1);
> - }
> -
> + start &= ~(CACHE_LINE_SIZE - 1);
> for (addr = start; addr < end; addr += CACHE_LINE_SIZE)
> sync_writel(addr, L2X0_INV_LINE_PA, 1);
> cache_sync();
> diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
> index 2ebc1b3..553931a 100644
> --- a/arch/arm/mm/cache-v4wb.S
> +++ b/arch/arm/mm/cache-v4wb.S
> @@ -173,16 +173,11 @@ ENTRY(v4wb_coherent_user_range)
> * - end - virtual end address
> */
> ENTRY(v4wb_dma_inv_range)
> - tst r0, #CACHE_DLINESIZE - 1
> bic r0, r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> blo 1b
> - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
> mov pc, lr
>
> /*
> diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
> index 295e25d..d1dfd87 100644
> --- a/arch/arm/mm/cache-v6.S
> +++ b/arch/arm/mm/cache-v6.S
> @@ -195,20 +195,7 @@ ENTRY(v6_flush_kern_dcache_page)
> * - end - virtual end address of region
> */
> ENTRY(v6_dma_inv_range)
> - tst r0, #D_CACHE_LINE_SIZE - 1
> bic r0, r0, #D_CACHE_LINE_SIZE - 1
> -#ifdef HARVARD_CACHE
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D line
> -#else
> - mcrne p15, 0, r0, c7, c11, 1 @ clean unified line
> -#endif
> - tst r1, #D_CACHE_LINE_SIZE - 1
> - bic r1, r1, #D_CACHE_LINE_SIZE - 1
> -#ifdef HARVARD_CACHE
> - mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line
> -#else
> - mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line
> -#endif
> 1:
> #ifdef HARVARD_CACHE
> mcr p15, 0, r0, c7, c6, 1 @ invalidate D line
> diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
> index e1bd975..893ee59 100644
> --- a/arch/arm/mm/cache-v7.S
> +++ b/arch/arm/mm/cache-v7.S
> @@ -218,13 +218,7 @@ ENDPROC(v7_flush_kern_dcache_page)
> ENTRY(v7_dma_inv_range)
> dcache_line_size r2, r3
> sub r3, r2, #1
> - tst r0, r3
> bic r0, r0, r3
> - mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line
> -
> - tst r1, r3
> - bic r1, r1, r3
> - mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line
> 1:
> mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line
> add r0, r0, r2
> diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
> index d9fb4b9..7bbf624 100644
> --- a/arch/arm/mm/proc-arm1020.S
> +++ b/arch/arm/mm/proc-arm1020.S
> @@ -267,15 +267,7 @@ ENTRY(arm1020_flush_kern_dcache_page)
> ENTRY(arm1020_dma_inv_range)
> mov ip, #0
> #ifndef CONFIG_CPU_DCACHE_DISABLE
> - tst r0, #CACHE_DLINESIZE - 1
> bic r0, r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, ip, c7, c10, 4
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - mcrne p15, 0, ip, c7, c10, 4 @ drain WB
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, ip, c7, c10, 4
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> - mcrne p15, 0, ip, c7, c10, 4 @ drain WB
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
> index 7453b75..d379cb7 100644
> --- a/arch/arm/mm/proc-arm1020e.S
> +++ b/arch/arm/mm/proc-arm1020e.S
> @@ -260,11 +260,7 @@ ENTRY(arm1020e_flush_kern_dcache_page)
> ENTRY(arm1020e_dma_inv_range)
> mov ip, #0
> #ifndef CONFIG_CPU_DCACHE_DISABLE
> - tst r0, #CACHE_DLINESIZE - 1
> bic r0, r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
> index 8eb72d7..f5a7949 100644
> --- a/arch/arm/mm/proc-arm1022.S
> +++ b/arch/arm/mm/proc-arm1022.S
> @@ -249,11 +249,7 @@ ENTRY(arm1022_flush_kern_dcache_page)
> ENTRY(arm1022_dma_inv_range)
> mov ip, #0
> #ifndef CONFIG_CPU_DCACHE_DISABLE
> - tst r0, #CACHE_DLINESIZE - 1
> bic r0, r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
> index 3b59f0d..1dc26f8 100644
> --- a/arch/arm/mm/proc-arm1026.S
> +++ b/arch/arm/mm/proc-arm1026.S
> @@ -243,11 +243,7 @@ ENTRY(arm1026_flush_kern_dcache_page)
> ENTRY(arm1026_dma_inv_range)
> mov ip, #0
> #ifndef CONFIG_CPU_DCACHE_DISABLE
> - tst r0, #CACHE_DLINESIZE - 1
> bic r0, r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
> index 2b7c197..078a873 100644
> --- a/arch/arm/mm/proc-arm920.S
> +++ b/arch/arm/mm/proc-arm920.S
> @@ -239,11 +239,7 @@ ENTRY(arm920_flush_kern_dcache_page)
> * (same as v4wb)
> */
> ENTRY(arm920_dma_inv_range)
> - tst r0, #CACHE_DLINESIZE - 1
> bic r0, r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
> index 06a1aa4..22ca857 100644
> --- a/arch/arm/mm/proc-arm922.S
> +++ b/arch/arm/mm/proc-arm922.S
> @@ -241,11 +241,7 @@ ENTRY(arm922_flush_kern_dcache_page)
> * (same as v4wb)
> */
> ENTRY(arm922_dma_inv_range)
> - tst r0, #CACHE_DLINESIZE - 1
> bic r0, r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
> index cb53435..ff04299 100644
> --- a/arch/arm/mm/proc-arm925.S
> +++ b/arch/arm/mm/proc-arm925.S
> @@ -283,12 +283,6 @@ ENTRY(arm925_flush_kern_dcache_page)
> * (same as v4wb)
> */
> ENTRY(arm925_dma_inv_range)
> -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
> - tst r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> -#endif
> bic r0, r0, #CACHE_DLINESIZE - 1
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
> index 1c48487..4b4c717 100644
> --- a/arch/arm/mm/proc-arm926.S
> +++ b/arch/arm/mm/proc-arm926.S
> @@ -246,12 +246,6 @@ ENTRY(arm926_flush_kern_dcache_page)
> * (same as v4wb)
> */
> ENTRY(arm926_dma_inv_range)
> -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
> - tst r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> -#endif
> bic r0, r0, #CACHE_DLINESIZE - 1
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
> index 40c0449..589a61c 100644
> --- a/arch/arm/mm/proc-arm946.S
> +++ b/arch/arm/mm/proc-arm946.S
> @@ -215,18 +215,11 @@ ENTRY(arm946_flush_kern_dcache_page)
> * (same as arm926)
> */
> ENTRY(arm946_dma_inv_range)
> -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
> - tst r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> -#endif
> bic r0, r0, #CACHE_DLINESIZE - 1
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> blo 1b
> - mcr p15, 0, r0, c7, c10, 4 @ drain WB
> mov pc, lr
>
> /*
> diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
> index d0d7795..1262b92 100644
> --- a/arch/arm/mm/proc-feroceon.S
> +++ b/arch/arm/mm/proc-feroceon.S
> @@ -274,11 +274,7 @@ ENTRY(feroceon_range_flush_kern_dcache_page)
> */
> .align 5
> ENTRY(feroceon_dma_inv_range)
> - tst r0, #CACHE_DLINESIZE - 1
> bic r0, r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> cmp r0, r1
> @@ -289,10 +285,6 @@ ENTRY(feroceon_dma_inv_range)
> .align 5
> ENTRY(feroceon_range_dma_inv_range)
> mrs r2, cpsr
> - tst r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> cmp r1, r0
> subne r1, r1, #1 @ top address is inclusive
> orr r3, r2, #PSR_I_BIT
> diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
> index 52b5fd7..191ea6d 100644
> --- a/arch/arm/mm/proc-mohawk.S
> +++ b/arch/arm/mm/proc-mohawk.S
> @@ -218,10 +218,6 @@ ENTRY(mohawk_flush_kern_dcache_page)
> * (same as v4wb)
> */
> ENTRY(mohawk_dma_inv_range)
> - tst r0, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHE_DLINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> bic r0, r0, #CACHE_DLINESIZE - 1
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHE_DLINESIZE
> diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
> index 2028f37..2c1ac69 100644
> --- a/arch/arm/mm/proc-xsc3.S
> +++ b/arch/arm/mm/proc-xsc3.S
> @@ -257,11 +257,7 @@ ENTRY(xsc3_flush_kern_dcache_page)
> * - end - virtual end address
> */
> ENTRY(xsc3_dma_inv_range)
> - tst r0, #CACHELINESIZE - 1
> bic r0, r0, #CACHELINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line
> - tst r1, #CACHELINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line
> add r0, r0, #CACHELINESIZE
> cmp r0, r1
> diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
> index f056c28..3170348 100644
> --- a/arch/arm/mm/proc-xscale.S
> +++ b/arch/arm/mm/proc-xscale.S
> @@ -315,11 +315,7 @@ ENTRY(xscale_flush_kern_dcache_page)
> * - end - virtual end address
> */
> ENTRY(xscale_dma_inv_range)
> - tst r0, #CACHELINESIZE - 1
> bic r0, r0, #CACHELINESIZE - 1
> - mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
> - tst r1, #CACHELINESIZE - 1
> - mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
> 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
> add r0, r0, #CACHELINESIZE
> cmp r0, r1
> --
> 1.6.2.5
>
More information about the linux-arm-kernel
mailing list