[PATCH 03/12] cache functions from kernel
Sascha Hauer
s.hauer at pengutronix.de
Mon Jan 25 02:32:21 EST 2010
Signed-off-by: Sascha Hauer <s.hauer at pengutronix.de>
---
arch/arm/cpu/Makefile | 6 +-
arch/arm/cpu/cache-armv4.S | 137 ++++++++++++++++++++++++++++++++
arch/arm/cpu/cache-armv6.S | 131 +++++++++++++++++++++++++++++++
arch/arm/cpu/cache-armv7.S | 185 ++++++++++++++++++++++++++++++++++++++++++++
arch/arm/cpu/mmu.c | 29 ++------
5 files changed, 463 insertions(+), 25 deletions(-)
create mode 100644 arch/arm/cpu/cache-armv4.S
create mode 100644 arch/arm/cpu/cache-armv6.S
create mode 100644 arch/arm/cpu/cache-armv7.S
diff --git a/arch/arm/cpu/Makefile b/arch/arm/cpu/Makefile
index 538ab57..2273e45 100644
--- a/arch/arm/cpu/Makefile
+++ b/arch/arm/cpu/Makefile
@@ -11,5 +11,7 @@ obj-$(CONFIG_ARCH_IMX31) += start-arm.o
obj-$(CONFIG_ARCH_IMX35) += start-arm.o
obj-$(CONFIG_CMD_ARM_CPUINFO) += cpuinfo.o
obj-$(CONFIG_MMU) += mmu.o
-obj-$(CONFIG_MMU) += cache.o
-
+obj-$(CONFIG_CPU_32v4T) += cache-armv4.o
+obj-$(CONFIG_CPU_32v5) += cache-armv4.o
+obj-$(CONFIG_CPU_32v6) += cache-armv6.o
+obj-$(CONFIG_CPU_32v7) += cache-armv7.o
diff --git a/arch/arm/cpu/cache-armv4.S b/arch/arm/cpu/cache-armv4.S
new file mode 100644
index 0000000..a0ab256
--- /dev/null
+++ b/arch/arm/cpu/cache-armv4.S
@@ -0,0 +1,137 @@
+#include <linux/linkage.h>
+
+#define CACHE_DLINESIZE 32
+
+ENTRY(__mmu_cache_on)
+ mov r12, lr
+#ifdef CONFIG_MMU
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
+ mrc p15, 0, r0, c1, c0, 0 @ read control reg
+ orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
+ orr r0, r0, #0x0030
+#ifdef CONFIG_CPU_ENDIAN_BE8
+ orr r0, r0, #1 << 25 @ big-endian page tables
+#endif
+ bl __common_mmu_cache_on
+ mov r0, #0
+ mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
+#endif
+ mov pc, r12
+ENDPROC(__mmu_cache_on)
+
+__common_mmu_cache_on:
+ orr r0, r0, #0x000d @ Write buffer, mmu
+ b 1f
+ .align 5 @ cache line aligned
+1: mcr p15, 0, r0, c1, c0, 0 @ load control register
+ mrc p15, 0, r0, c1, c0, 0 @ and read it back to
+ sub pc, lr, r0, lsr #32 @ properly flush pipeline
+
+ENTRY(__mmu_cache_off)
+#ifdef CONFIG_MMU
+ mrc p15, 0, r0, c1, c0
+ bic r0, r0, #0x000d
+ mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
+ mov r0, #0
+ mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4
+ mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4
+#endif
+ mov pc, lr
+ENDPROC(__mmu_cache_off)
+
+ENTRY(__mmu_cache_flush)
+ mrc p15, 0, r6, c0, c0 @ get processor ID
+ mov r2, #64*1024 @ default: 32K dcache size (*2)
+ mov r11, #32 @ default: 32 byte line size
+ mrc p15, 0, r3, c0, c0, 1 @ read cache type
+ teq r3, r6 @ cache ID register present?
+ beq no_cache_id
+ mov r1, r3, lsr #18
+ and r1, r1, #7
+ mov r2, #1024
+ mov r2, r2, lsl r1 @ base dcache size *2
+ tst r3, #1 << 14 @ test M bit
+ addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1
+ mov r3, r3, lsr #12
+ and r3, r3, #3
+ mov r11, #8
+ mov r11, r11, lsl r3 @ cache line size in bytes
+no_cache_id:
+ mov r1, pc
+ bic r1, r1, #63 @ align to longest cache line
+ add r2, r1, r2
+1:
+ ldr r3, [r1], r11 @ s/w flush D cache
+ teq r1, r2
+ bne 1b
+
+ mcr p15, 0, r1, c7, c5, 0 @ flush I cache
+ mcr p15, 0, r1, c7, c6, 0 @ flush D cache
+ mcr p15, 0, r1, c7, c10, 4 @ drain WB
+ mov pc, lr
+ENDPROC(__mmu_cache_flush)
+
+/*
+ * dma_inv_range(start, end)
+ *
+ * Invalidate (discard) the specified virtual address range.
+ * May not write back any entries. If 'start' or 'end'
+ * are not cache line aligned, those lines must be written
+ * back.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(dma_inv_range)
+ tst r0, #CACHE_DLINESIZE - 1
+ bic r0, r0, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
+ tst r1, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r1, c7, c10, 1 @ clean D entry
+1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/*
+ * dma_clean_range(start, end)
+ *
+ * Clean the specified virtual address range.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(dma_clean_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
+/*
+ * dma_flush_range(start, end)
+ *
+ * Clean and invalidate the specified virtual address range.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(dma_flush_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
+ mov pc, lr
+
diff --git a/arch/arm/cpu/cache-armv6.S b/arch/arm/cpu/cache-armv6.S
new file mode 100644
index 0000000..ceabd52
--- /dev/null
+++ b/arch/arm/cpu/cache-armv6.S
@@ -0,0 +1,131 @@
+#include <linux/linkage.h>
+
+#define HARVARD_CACHE
+#define CACHE_LINE_SIZE 32
+#define D_CACHE_LINE_SIZE 32
+
+ENTRY(__mmu_cache_on)
+ mov r12, lr
+#ifdef CONFIG_MMU
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
+ mrc p15, 0, r0, c1, c0, 0 @ read control reg
+ orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
+ orr r0, r0, #0x0030
+#ifdef CONFIG_CPU_ENDIAN_BE8
+ orr r0, r0, #1 << 25 @ big-endian page tables
+#endif
+ bl __common_mmu_cache_on
+ mov r0, #0
+ mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
+#endif
+ mov pc, r12
+ENDPROC(__mmu_cache_on)
+
+__common_mmu_cache_on:
+ orr r0, r0, #0x000d @ Write buffer, mmu
+ b 1f
+ .align 5 @ cache line aligned
+1: mcr p15, 0, r0, c1, c0, 0 @ load control register
+ mrc p15, 0, r0, c1, c0, 0 @ and read it back to
+ sub pc, lr, r0, lsr #32 @ properly flush pipeline
+
+ENTRY(__mmu_cache_off)
+#ifdef CONFIG_MMU
+ mrc p15, 0, r0, c1, c0
+ bic r0, r0, #0x000d
+ mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
+ mov r0, #0
+ mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4
+ mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4
+#endif
+ mov pc, lr
+
+ENTRY(__mmu_cache_flush)
+ mov r1, #0
+ mcr p15, 0, r1, c7, c14, 0 @ clean+invalidate D
+ mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB
+ mcr p15, 0, r1, c7, c15, 0 @ clean+invalidate unified
+ mcr p15, 0, r1, c7, c10, 4 @ drain WB
+ mov pc, lr
+ENDPROC(__mmu_cache_flush)
+
+/*
+ * v6_dma_inv_range(start,end)
+ *
+ * Invalidate the data cache within the specified region; we will
+ * be performing a DMA operation in this region and we want to
+ * purge old data in the cache.
+ *
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(dma_inv_range)
+ tst r0, #D_CACHE_LINE_SIZE - 1
+ bic r0, r0, #D_CACHE_LINE_SIZE - 1
+#ifdef HARVARD_CACHE
+ mcrne p15, 0, r0, c7, c10, 1 @ clean D line
+#else
+ mcrne p15, 0, r0, c7, c11, 1 @ clean unified line
+#endif
+ tst r1, #D_CACHE_LINE_SIZE - 1
+ bic r1, r1, #D_CACHE_LINE_SIZE - 1
+#ifdef HARVARD_CACHE
+ mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line
+#else
+ mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line
+#endif
+1:
+#ifdef HARVARD_CACHE
+ mcr p15, 0, r0, c7, c6, 1 @ invalidate D line
+#else
+ mcr p15, 0, r0, c7, c7, 1 @ invalidate unified line
+#endif
+ add r0, r0, #D_CACHE_LINE_SIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * v6_dma_clean_range(start,end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(dma_clean_range)
+ bic r0, r0, #D_CACHE_LINE_SIZE - 1
+1:
+#ifdef HARVARD_CACHE
+ mcr p15, 0, r0, c7, c10, 1 @ clean D line
+#else
+ mcr p15, 0, r0, c7, c11, 1 @ clean unified line
+#endif
+ add r0, r0, #D_CACHE_LINE_SIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * v6_dma_flush_range(start,end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(dma_flush_range)
+ bic r0, r0, #D_CACHE_LINE_SIZE - 1
+1:
+#ifdef HARVARD_CACHE
+ mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line
+#else
+ mcr p15, 0, r0, c7, c15, 1 @ clean & invalidate line
+#endif
+ add r0, r0, #D_CACHE_LINE_SIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
diff --git a/arch/arm/cpu/cache-armv7.S b/arch/arm/cpu/cache-armv7.S
new file mode 100644
index 0000000..b370acd
--- /dev/null
+++ b/arch/arm/cpu/cache-armv7.S
@@ -0,0 +1,185 @@
+#include <linux/linkage.h>
+
+ENTRY(__mmu_cache_on)
+ mov r12, lr
+#ifdef CONFIG_MMU
+ mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0
+ tst r11, #0xf @ VMSA
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ tst r11, #0xf @ VMSA
+ mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
+#endif
+ mrc p15, 0, r0, c1, c0, 0 @ read control reg
+ orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
+ orr r0, r0, #0x003c @ write buffer
+#ifdef CONFIG_MMU
+#ifdef CONFIG_CPU_ENDIAN_BE8
+ orr r0, r0, #1 << 25 @ big-endian page tables
+#endif
+ orrne r0, r0, #1 @ MMU enabled
+ movne r1, #-1
+ mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer
+ mcrne p15, 0, r1, c3, c0, 0 @ load domain access control
+#endif
+ mcr p15, 0, r0, c1, c0, 0 @ load control register
+ mrc p15, 0, r0, c1, c0, 0 @ and read it back
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 4 @ ISB
+ mov pc, r12
+ENDPROC(__mmu_cache_on)
+
+ENTRY(__mmu_cache_off)
+ mrc p15, 0, r0, c1, c0
+#ifdef CONFIG_MMU
+ bic r0, r0, #0x000d
+#else
+ bic r0, r0, #0x000c
+#endif
+ mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
+ mov r12, lr
+ bl __mmu_cache_flush
+ mov r0, #0
+#ifdef CONFIG_MMU
+ mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB
+#endif
+ mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC
+ mcr p15, 0, r0, c7, c10, 4 @ DSB
+ mcr p15, 0, r0, c7, c5, 4 @ ISB
+ mov pc, r12
+ENDPROC(__mmu_cache_on)
+
+ENTRY(__mmu_cache_flush)
+ mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1
+ tst r10, #0xf << 16 @ hierarchical cache (ARMv7)
+ mov r10, #0
+ beq hierarchical
+ mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D
+ b iflush
+hierarchical:
+ mcr p15, 0, r10, c7, c10, 5 @ DMB
+ stmfd sp!, {r0-r7, r9-r11}
+ mrc p15, 1, r0, c0, c0, 1 @ read clidr
+ ands r3, r0, #0x7000000 @ extract loc from clidr
+ mov r3, r3, lsr #23 @ left align loc bit field
+ beq finished @ if loc is 0, then no need to clean
+ mov r10, #0 @ start clean at cache level 0
+loop1:
+ add r2, r10, r10, lsr #1 @ work out 3x current cache level
+ mov r1, r0, lsr r2 @ extract cache type bits from clidr
+ and r1, r1, #7 @ mask of the bits for current cache only
+ cmp r1, #2 @ see what cache we have at this level
+ blt skip @ skip if no cache, or just i-cache
+ mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
+ mcr p15, 0, r10, c7, c5, 4 @ isb to sych the new cssr&csidr
+ mrc p15, 1, r1, c0, c0, 0 @ read the new csidr
+ and r2, r1, #7 @ extract the length of the cache lines
+ add r2, r2, #4 @ add 4 (line length offset)
+ ldr r4, =0x3ff
+ ands r4, r4, r1, lsr #3 @ find maximum number on the way size
+ clz r5, r4 @ find bit position of way size increment
+ ldr r7, =0x7fff
+ ands r7, r7, r1, lsr #13 @ extract max number of the index size
+loop2:
+ mov r9, r4 @ create working copy of max way size
+loop3:
+ orr r11, r10, r9, lsl r5 @ factor way and cache number into r11
+ orr r11, r11, r7, lsl r2 @ factor index number into r11
+ mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
+ subs r9, r9, #1 @ decrement the way
+ bge loop3
+ subs r7, r7, #1 @ decrement the index
+ bge loop2
+skip:
+ add r10, r10, #2 @ increment cache number
+ cmp r3, r10
+ bgt loop1
+finished:
+ ldmfd sp!, {r0-r7, r9-r11}
+ mov r10, #0 @ swith back to cache level 0
+ mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
+iflush:
+ mcr p15, 0, r10, c7, c10, 4 @ DSB
+ mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB
+ mcr p15, 0, r10, c7, c10, 4 @ DSB
+ mcr p15, 0, r10, c7, c5, 4 @ ISB
+ mov pc, lr
+ENDPROC(__mmu_cache_flush)
+
+/*
+ * cache_line_size - get the cache line size from the CSIDR register
+ * (available on ARMv7+). It assumes that the CSSR register was configured
+ * to access the L1 data cache CSIDR.
+ */
+ .macro dcache_line_size, reg, tmp
+ mrc p15, 1, \tmp, c0, c0, 0 @ read CSIDR
+ and \tmp, \tmp, #7 @ cache line size encoding
+ mov \reg, #16 @ size offset
+ mov \reg, \reg, lsl \tmp @ actual cache line size
+ .endm
+
+/*
+ * v7_dma_inv_range(start,end)
+ *
+ * Invalidate the data cache within the specified region; we will
+ * be performing a DMA operation in this region and we want to
+ * purge old data in the cache.
+ *
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(dma_inv_range)
+ dcache_line_size r2, r3
+ sub r3, r2, #1
+ tst r0, r3
+ bic r0, r0, r3
+ mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line
+
+ tst r1, r3
+ bic r1, r1, r3
+ mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line
+1:
+ mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line
+ add r0, r0, r2
+ cmp r0, r1
+ blo 1b
+ dsb
+ mov pc, lr
+ENDPROC(dma_inv_range)
+
+/*
+ * v7_dma_clean_range(start,end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(dma_clean_range)
+ dcache_line_size r2, r3
+ sub r3, r2, #1
+ bic r0, r0, r3
+1:
+ mcr p15, 0, r0, c7, c10, 1 @ clean D / U line
+ add r0, r0, r2
+ cmp r0, r1
+ blo 1b
+ dsb
+ mov pc, lr
+ENDPROC(dma_clean_range)
+
+/*
+ * v7_dma_flush_range(start,end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(dma_flush_range)
+ dcache_line_size r2, r3
+ sub r3, r2, #1
+ bic r0, r0, r3
+1:
+ mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line
+ add r0, r0, r2
+ cmp r0, r1
+ blo 1b
+ dsb
+ mov pc, lr
+ENDPROC(dma_flush_range)
+
diff --git a/arch/arm/cpu/mmu.c b/arch/arm/cpu/mmu.c
index 9e00927..fff7ddf 100644
--- a/arch/arm/cpu/mmu.c
+++ b/arch/arm/cpu/mmu.c
@@ -53,12 +53,10 @@ void mmu_init(void)
void mmu_enable(void)
{
asm volatile (
- "mrc p15, 0, r1, c1, c0, 0;"
- "orr r1, r1, #0x0007;" /* enable MMU + Dcache */
- "mcr p15, 0, r1, c1, c0, 0"
+ "bl __mmu_cache_on;"
:
:
- : "r1" /* Clobber list */
+ : "r0", "r1", "r2", "r3", "r6", "r10", "r12"
);
}
@@ -67,28 +65,13 @@ void mmu_enable(void)
*/
void mmu_disable(void)
{
+
asm volatile (
- "nop; "
- "nop; "
- "nop; "
- "nop; "
- "nop; "
- "nop; "
- /* test, clean and invalidate cache */
- "1: mrc p15, 0, r15, c7, c14, 3;"
- " bne 1b;"
- " mov pc, lr;"
- " mov r0, #0x0;"
- " mcr p15, 0, r0, c7, c10, 4;" /* drain the write buffer */
- " mcr p15, 0, r1, c7, c6, 0;" /* clear data cache */
- " mrc p15, 0, r1, c1, c0, 0;"
- " bic r1, r1, #0x0007;" /* disable MMU + DCache */
- " mcr p15, 0, r1, c1, c0, 0;"
- " mcr p15, 0, r0, c7, c6, 0;" /* flush d-cache */
- " mcr p15, 0, r0, c8, c7, 0;" /* flush i+d-TLBs */
+ "bl __mmu_cache_flush;"
+ "bl __mmu_cache_off;"
:
:
- : "r0" /* Clobber list */
+ : "r0", "r1", "r2", "r3", "r6", "r10", "r12"
);
}
--
1.6.6
More information about the barebox
mailing list