[PATCH] ARM: add clean and flush_dcache_all to ARM cache API

Linus Walleij linus.walleij at stericsson.com
Fri Nov 5 11:01:47 EDT 2010


From: Srinidhi KASAGAR <srinidhi.kasagar at stericsson.com>

This patch adds functions to flush and clean the entire data cache.
Since the existing flush/clean range functions take more time to
flush/clean the entire data cache, they cannot be used in
performance-critical situations:

Whenever the range of data to be flushed/cleaned from the data cache
is more than some threshold value, it is better to do the entire data
flush/clean, this will reduce the time taken and effectively
increases the performance of the system.

The actual cache clean/flush is only implemented for v7 in this
patch, the other cpu versions are just dummy functions.

Signed-off-by: Srinidhi KASAGAR <srinidhi.kasagar at stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij at stericsson.com>
---
Since we cannot easily test CPU < v7 this is a bit of RFC. Maybe
ARM people would like to take this over?

We really need this stuff, the usecase is buffer handling in
contigous memory for graphic buffers, which is being discussed
over at the V4L2 mailing list.
---
 arch/arm/include/asm/cacheflush.h |   17 ++++++
 arch/arm/mm/cache-fa.S            |   20 ++++++++
 arch/arm/mm/cache-v3.S            |   20 ++++++++
 arch/arm/mm/cache-v4.S            |   20 ++++++++
 arch/arm/mm/cache-v4wb.S          |   20 ++++++++
 arch/arm/mm/cache-v4wt.S          |   20 ++++++++
 arch/arm/mm/cache-v6.S            |   20 ++++++++
 arch/arm/mm/cache-v7.S            |   98 ++++++++++++++++++++++++++++++++++---
 arch/arm/mm/proc-v7.S             |    2 +-
 9 files changed, 228 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 3acd8fa..c004005 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -201,6 +201,14 @@
  *		- kaddr  - page address
  *		- size   - region size
  *
+ *	clean_dcache_all()
+ *
+ *		Cleans the entire d-cache.
+ *
+ *	flush_dcache_all()
+ *
+ *		Flushes the entire d-cache.
+ *
  *	DMA Cache Coherency
  *	===================
  *
@@ -221,6 +229,9 @@ struct cpu_cache_fns {
 	void (*coherent_user_range)(unsigned long, unsigned long);
 	void (*flush_kern_dcache_area)(void *, size_t);
 
+	void (*clean_dcache_all)(void);
+	void (*flush_dcache_all)(void);
+
 	void (*dma_map_area)(const void *, size_t, int);
 	void (*dma_unmap_area)(const void *, size_t, int);
 
@@ -241,6 +252,8 @@ extern struct cpu_cache_fns cpu_cache;
 #define __cpuc_coherent_kern_range	cpu_cache.coherent_kern_range
 #define __cpuc_coherent_user_range	cpu_cache.coherent_user_range
 #define __cpuc_flush_dcache_area	cpu_cache.flush_kern_dcache_area
+#define __cpuc_clean_dcache_all		cpu_cache.clean_dcache_all
+#define __cpuc_flush_dcache_all		cpu_cache.flush_dcache_all
 
 /*
  * These are private to the dma-mapping API.  Do not use directly.
@@ -261,6 +274,8 @@ extern struct cpu_cache_fns cpu_cache;
 #define __cpuc_coherent_kern_range	__glue(_CACHE,_coherent_kern_range)
 #define __cpuc_coherent_user_range	__glue(_CACHE,_coherent_user_range)
 #define __cpuc_flush_dcache_area	__glue(_CACHE,_flush_kern_dcache_area)
+#define __cpuc_clean_dcache_all		__glue(_CACHE,_clean_dcache_all)
+#define __cpuc_flush_dcache_all		__glue(_CACHE,_flush_dcache_all)
 
 extern void __cpuc_flush_icache_all(void);
 extern void __cpuc_flush_kern_all(void);
@@ -269,6 +284,8 @@ extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int);
 extern void __cpuc_coherent_kern_range(unsigned long, unsigned long);
 extern void __cpuc_coherent_user_range(unsigned long, unsigned long);
 extern void __cpuc_flush_dcache_area(void *, size_t);
+extern void __cpuc_clean_dcache_all(void);
+extern void __cpuc_flush_dcache_all(void);
 
 /*
  * These are private to the dma-mapping API.  Do not use directly.
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
index 1fa6f71..e82b373 100644
--- a/arch/arm/mm/cache-fa.S
+++ b/arch/arm/mm/cache-fa.S
@@ -240,6 +240,24 @@ ENTRY(fa_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(fa_dma_unmap_area)
 
+/*
+ *	clean_dcache_all()
+ *
+ *	Clean the whole D-cache.
+ */
+ENTRY(fa_clean_dcache_all)
+	mov	pc, lr
+ENDPROC(fa_clean_dcache_all)
+
+/*
+ *	flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ */
+ENTRY(fa_flush_dcache_all)
+	mov	pc, lr
+ENDPROC(fa_flush_dcache_all)
+
 	__INITDATA
 
 	.type	fa_cache_fns, #object
@@ -251,6 +269,8 @@ ENTRY(fa_cache_fns)
 	.long	fa_coherent_kern_range
 	.long	fa_coherent_user_range
 	.long	fa_flush_kern_dcache_area
+	.long	fa_clean_dcache_all
+	.long	fa_flush_dcache_all
 	.long	fa_dma_map_area
 	.long	fa_dma_unmap_area
 	.long	fa_dma_flush_range
diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S
index 2e2bc40..bf38b9b 100644
--- a/arch/arm/mm/cache-v3.S
+++ b/arch/arm/mm/cache-v3.S
@@ -127,6 +127,24 @@ ENTRY(v3_dma_map_area)
 ENDPROC(v3_dma_unmap_area)
 ENDPROC(v3_dma_map_area)
 
+/*
+ *	clean_dcache_all()
+ *
+ *	Clean the whole D-cache.
+ */
+ENTRY(v3_clean_dcache_all)
+	mov	pc, lr
+ENDPROC(v3_clean_dcache_all)
+
+/*
+ *	flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ */
+ENTRY(v3_flush_dcache_all)
+	mov	pc, lr
+ENDPROC(v3_flush_dcache_all)
+
 	__INITDATA
 
 	.type	v3_cache_fns, #object
@@ -138,6 +156,8 @@ ENTRY(v3_cache_fns)
 	.long	v3_coherent_kern_range
 	.long	v3_coherent_user_range
 	.long	v3_flush_kern_dcache_area
+	.long	v3_clean_dcache_all
+	.long	v3_flush_dcache_all
 	.long	v3_dma_map_area
 	.long	v3_dma_unmap_area
 	.long	v3_dma_flush_range
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index a8fefb5..242dbd8 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -139,6 +139,24 @@ ENTRY(v4_dma_map_area)
 ENDPROC(v4_dma_unmap_area)
 ENDPROC(v4_dma_map_area)
 
+/*
+ *	clean_dcache_all()
+ *
+ *	Clean the whole D-cache.
+ */
+ENTRY(v4_clean_dcache_all)
+	mov	pc, lr
+ENDPROC(v4_clean_dcache_all)
+
+/*
+ *	flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ */
+ENTRY(v4_flush_dcache_all)
+	mov	pc, lr
+ENDPROC(v4_flush_dcache_all)
+
 	__INITDATA
 
 	.type	v4_cache_fns, #object
@@ -150,6 +168,8 @@ ENTRY(v4_cache_fns)
 	.long	v4_coherent_kern_range
 	.long	v4_coherent_user_range
 	.long	v4_flush_kern_dcache_area
+	.long	v4_clean_dcache_all
+	.long	v4_flush_dcache_all
 	.long	v4_dma_map_area
 	.long	v4_dma_unmap_area
 	.long	v4_dma_flush_range
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index d3644db..0847304 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -251,6 +251,24 @@ ENTRY(v4wb_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(v4wb_dma_unmap_area)
 
+/*
+ *	clean_dcache_all()
+ *
+ *	Clean the whole D-cache.
+ */
+ENTRY(v4wb_clean_dcache_all)
+	mov	pc, lr
+ENDPROC(v4wb_clean_dcache_all)
+
+/*
+ *	flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ */
+ENTRY(v4wb_flush_dcache_all)
+	mov	pc, lr
+ENDPROC(v4wb_flush_dcache_all)
+
 	__INITDATA
 
 	.type	v4wb_cache_fns, #object
@@ -262,6 +280,8 @@ ENTRY(v4wb_cache_fns)
 	.long	v4wb_coherent_kern_range
 	.long	v4wb_coherent_user_range
 	.long	v4wb_flush_kern_dcache_area
+	.long	v4wb_clean_dcache_all
+	.long	v4wb_flush_dcache_all
 	.long	v4wb_dma_map_area
 	.long	v4wb_dma_unmap_area
 	.long	v4wb_dma_flush_range
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index 49c2b66..dab4533 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -195,6 +195,24 @@ ENTRY(v4wt_dma_map_area)
 ENDPROC(v4wt_dma_unmap_area)
 ENDPROC(v4wt_dma_map_area)
 
+/*
+ *	clean_dcache_all()
+ *
+ *	Clean the whole D-cache.
+ */
+ENTRY(v4wt_clean_dcache_all)
+	mov	pc, lr
+ENDPROC(v4wt_clean_dcache_all)
+
+/*
+ *	flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ */
+ENTRY(v4wt_flush_dcache_all)
+	mov	pc, lr
+ENDPROC(v4wt_flush_dcache_all)
+
 	__INITDATA
 
 	.type	v4wt_cache_fns, #object
@@ -206,6 +224,8 @@ ENTRY(v4wt_cache_fns)
 	.long	v4wt_coherent_kern_range
 	.long	v4wt_coherent_user_range
 	.long	v4wt_flush_kern_dcache_area
+	.long	v4wt_clean_dcache_all
+	.long	v4wt_flush_dcache_all
 	.long	v4wt_dma_map_area
 	.long	v4wt_dma_unmap_area
 	.long	v4wt_dma_flush_range
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 99fa688..e2cd64a 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -315,6 +315,24 @@ ENTRY(v6_dma_unmap_area)
 	mov	pc, lr
 ENDPROC(v6_dma_unmap_area)
 
+/*
+ *	clean_dcache_all()
+ *
+ *	Clean the whole D-cache.
+ */
+ENTRY(v6_clean_dcache_all)
+	mov	pc, lr
+ENDPROC(v6_clean_dcache_all)
+
+/*
+ *	flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ */
+ENTRY(v6_flush_dcache_all)
+	mov	pc, lr
+ENDPROC(v6_flush_dcache_all)
+
 	__INITDATA
 
 	.type	v6_cache_fns, #object
@@ -326,6 +344,8 @@ ENTRY(v6_cache_fns)
 	.long	v6_coherent_kern_range
 	.long	v6_coherent_user_range
 	.long	v6_flush_kern_dcache_area
+	.long	v6_clean_dcache_all
+	.long	v6_flush_dcache_all
 	.long	v6_dma_map_area
 	.long	v6_dma_unmap_area
 	.long	v6_dma_flush_range
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index a3ebf7a..477a94c 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -33,15 +33,13 @@ ENTRY(v7_flush_icache_all)
 ENDPROC(v7_flush_icache_all)
 
 /*
- *	v7_flush_dcache_all()
+ *	__v7_flush_dcache_all()
  *
  *	Flush the whole D-cache.
  *
  *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
- *
- *	- mm    - mm_struct describing address space
  */
-ENTRY(v7_flush_dcache_all)
+ENTRY(__v7_flush_dcache_all)
 	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
 	ands	r3, r0, #0x7000000		@ extract loc from clidr
@@ -88,9 +86,93 @@ finished:
 	dsb
 	isb
 	mov	pc, lr
+ENDPROC(__v7_flush_dcache_all)
+
+/*
+ *	__v7_clean_dcache_all()
+ *
+ *	Clean the whole D-cache.
+ *
+ *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ */
+ENTRY(__v7_clean_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
+	ands	r3, r0, #0x7000000		@ extract loc from clidr
+	mov	r3, r3, lsr #23			@ left align loc bit field
+	beq	finished1			@ if loc is 0, then no need to clean
+	mov	r10, #0				@ start clean at cache level 0
+loop21:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
+	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
+	and	r1, r1, #7			@ mask of the bits for current cache only
+	cmp	r1, #2				@ see what cache we have at this level
+	blt	skip1				@ skip if no cache, or just i-cache
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	isb					@ isb to sych the new cssr&csidr
+	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+	and	r2, r1, #7			@ extract the length of the cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	ldr	r4, =0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
+	clz	r5, r4				@ find bit position of way size increment
+	ldr	r7, =0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+loop22:
+	mov	r9, r4				@ create working copy of max way size
+loop23:
+ ARM(	orr	r11, r10, r9, lsl r5	)	@ factor way and cache number into r11
+ THUMB(	lsl	r6, r9, r5		)
+ THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
+ ARM(	orr	r11, r11, r7, lsl r2	)	@ factor index number into r11
+ THUMB(	lsl	r6, r7, r2		)
+ THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
+	mcr	p15, 0, r11, c7, c10, 2		@ clean by set/way
+	subs	r9, r9, #1			@ decrement the way
+	bge	loop23
+	subs	r7, r7, #1			@ decrement the index
+	bge	loop22
+skip1:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	loop21
+finished1:
+	mov	r10, #0				@ swith back to cache level 0
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	dsb
+	isb
+	mov	pc, lr
+ENDPROC(__v7_clean_dcache_all)
+
+/*
+ *	v7_flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ */
+ENTRY(v7_flush_dcache_all)
+ ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	bl	__v7_flush_dcache_all
+ ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	mov	pc, lr
 ENDPROC(v7_flush_dcache_all)
 
 /*
+ *	v7_clean_dcache_all()
+ *
+ *	Clean the whole D-cache.
+ */
+ENTRY(v7_clean_dcache_all)
+ ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	bl	__v7_clean_dcache_all
+ ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	mov	pc, lr
+ENDPROC(v7_clean_dcache_all)
+
+/*
  *	v7_flush_cache_all()
  *
  *	Flush the entire cache system.
@@ -102,14 +184,12 @@ ENDPROC(v7_flush_dcache_all)
  *
  */
 ENTRY(v7_flush_kern_cache_all)
- ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	stmfd	sp!, {lr}
 	bl	v7_flush_dcache_all
 	mov	r0, #0
 	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
 	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
- ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	ldmfd	sp!, {lr}
 	mov	pc, lr
 ENDPROC(v7_flush_kern_cache_all)
 
@@ -325,6 +405,8 @@ ENTRY(v7_cache_fns)
 	.long	v7_coherent_kern_range
 	.long	v7_coherent_user_range
 	.long	v7_flush_kern_dcache_area
+	.long	v7_clean_dcache_all
+	.long	v7_flush_dcache_all
 	.long	v7_dma_map_area
 	.long	v7_dma_unmap_area
 	.long	v7_dma_flush_range
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 53cbe22..753783e 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -196,7 +196,7 @@ __v7_ca9mp_setup:
 __v7_setup:
 	adr	r12, __v7_setup_stack		@ the local stack
 	stmia	r12, {r0-r5, r7, r9, r11, lr}
-	bl	v7_flush_dcache_all
+	bl	__v7_flush_dcache_all
 	ldmia	r12, {r0-r5, r7, r9, r11, lr}
 
 	mrc	p15, 0, r0, c0, c0, 0		@ read main ID register
-- 
1.6.3.3




More information about the linux-arm-kernel mailing list