[RFC] mixture of cleanups to cache-v7.S

Russell King - ARM Linux linux at arm.linux.org.uk
Thu Apr 2 15:57:59 PDT 2015


On Thu, Apr 02, 2015 at 11:49:47PM +0100, Russell King - ARM Linux wrote:
> Several cleanups are in the patch below... I'll separate them out, but
> I'd like to hear comments on them.  Basically:
> 
> 1. cache-v7.S is built for ARMv7 CPUs, so there's no reason not to
>    use movw and movt when loading large constants, rather than using
>    "ldr rd,=constant"
> 
> 2. we can do a much more efficient check for the errata in
>    v7_flush_dcache_louis than we were doing - rather than putting the
>    work-around code in the fast path, we can re-organise this such that
>    we only try to run the workaround code if the LoU field is zero.
> 
> 3. shift the bitfield we want to extract in the CLIDR to the appropriate
>    bit position prior to masking; this reduces the complexity of the
>    code, particularly with the SMP differences in v7_flush_dcache_louis.
> 
> 4. pre-shift the Cortex A9 MIDR value to be checked, and shift the
>    actual MIDR to lose the bottom four revision bits.
> 
> 5. as the v7_flush_dcache_louis code is more optimal, I see no reason not
>    to enable this workaround by default now - if people really want it to
>    be disabled, they can still choose that option.  This is in addition
>    to Versatile Express enabling it.  Given the memory corrupting abilities
>    of not having this errata enabled, I think it's only sane that it's
>    something that should be encouraged to be enabled, even though it only
>    affects r0pX CPUs.
> 
> One obvious issue comes up here though - in the case that the LoU bits
> are validly zero, we merely return from v7_flush_dcache_louis with no
> DSB or ISB.  However v7_flush_dcache_all always has a DSB or ISB at the
> end, even if LoC is zero.  Is this an intentional difference, or should
> v7_flush_dcache_louis always end with a DSB+ISB ?

I should point out that if the DSB+ISB is needed, then the code can
instead become as below - basically, we just move the CLIDR into the
appropriate position and call start_flush_levels, which does the DMB,
applies the mask to extract the appropriate field, and then decides
whether it has any levels to process.

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2eb6de9465bf..c26dfef393cd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1139,6 +1139,7 @@ config ARM_ERRATA_742231
 config ARM_ERRATA_643719
 	bool "ARM errata: LoUIS bit field in CLIDR register is incorrect"
 	depends on CPU_V7 && SMP
+	default y
 	help
 	  This option enables the workaround for the 643719 Cortex-A9 (prior to
 	  r1p0) erratum. On affected cores the LoUIS bit field of the CLIDR
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index b966656d2c2d..14bfdd584385 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -36,10 +36,10 @@ ENTRY(v7_invalidate_l1)
        mcr     p15, 2, r0, c0, c0, 0
        mrc     p15, 1, r0, c0, c0, 0
 
-       ldr     r1, =0x7fff
+       movw    r1, #0x7fff
        and     r2, r1, r0, lsr #13
 
-       ldr     r1, =0x3ff
+       movw    r1, #0x3ff
 
        and     r3, r1, r0, lsr #3      @ NumWays - 1
        add     r2, r2, #1              @ NumSets
@@ -88,23 +88,20 @@ ENDPROC(v7_flush_icache_all)
  */
 
 ENTRY(v7_flush_dcache_louis)
-	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
-	ALT_SMP(ands	r3, r0, #(7 << 21))	@ extract LoUIS from clidr
-	ALT_UP(ands	r3, r0, #(7 << 27))	@ extract LoUU from clidr
+ALT_SMP(mov	r3, r0, lsr #20)		@ move LoUIS into position
+ALT_UP(	mov	r3, r0, lsr #26)		@ move LoUU into position
 #ifdef CONFIG_ARM_ERRATA_643719
-	ALT_SMP(mrceq	p15, 0, r2, c0, c0, 0)	@ read main ID register
-	ALT_UP(reteq	lr)			@ LoUU is zero, so nothing to do
-	ldreq	r1, =0x410fc090                 @ ID of ARM Cortex A9 r0p?
-	biceq	r2, r2, #0x0000000f             @ clear minor revision number
-	teqeq	r2, r1                          @ test for errata affected core and if so...
-	orreqs	r3, #(1 << 21)			@   fix LoUIS value (and set flags state to 'ne')
+ALT_SMP(ands	r3, r3, #7 << 1)		@ extract LoU field from clidr
+ALT_UP(	b	start_flush_levels)
+	bne	start_flush_levels		@ LoU != 0, start flushing
+	mrc	p15, 0, r2, c0, c0, 0		@ read main ID register
+	movw	r1, #:lower16:(0x410fc090 >> 4)	@ ID of ARM Cortex A9 r0p?
+	movt	r1, #:upper16:(0x410fc090 >> 4)
+	teq	r1, r2, lsr #4			@ test for errata affected core and if so...
+	moveq	r3, #1 << 1			@ fix LoUIS value (and set flags state to 'ne')
 #endif
-	ALT_SMP(mov	r3, r3, lsr #20)	@ r3 = LoUIS * 2
-	ALT_UP(mov	r3, r3, lsr #26)	@ r3 = LoUU * 2
-	reteq	lr				@ return if level == 0
-	mov	r10, #0				@ r10 (starting level) = 0
-	b	flush_levels			@ start flushing cache levels
+	b	start_flush_levels		@ start flushing cache levels
 ENDPROC(v7_flush_dcache_louis)
 
 /*
@@ -117,10 +114,11 @@ ENDPROC(v7_flush_dcache_louis)
  *	- mm    - mm_struct describing address space
  */
 ENTRY(v7_flush_dcache_all)
-	dmb					@ ensure ordering with previous memory accesses
 	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
-	ands	r3, r0, #0x7000000		@ extract loc from clidr
-	mov	r3, r3, lsr #23			@ left align loc bit field
+	mov	r3, r0, lsr #23			@ align LoC
+start_flush_levels:
+	dmb					@ ensure ordering with previous memory accesses
+	ands	r3, r3, #7 << 1			@ extract loc from clidr
 	beq	finished			@ if loc is 0, then no need to clean
 	mov	r10, #0				@ start clean at cache level 0
 flush_levels:
@@ -140,10 +138,10 @@ flush_levels:
 #endif
 	and	r2, r1, #7			@ extract the length of the cache lines
 	add	r2, r2, #4			@ add 4 (line length offset)
-	ldr	r4, =0x3ff
+	movw	r4, #0x3ff
 	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
 	clz	r5, r4				@ find bit position of way size increment
-	ldr	r7, =0x7fff
+	movw	r7, #0x7fff
 	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
 loop1:
 	mov	r9, r7				@ create working copy of max index

-- 
FTTC broadband for 0.8mile line: currently at 10.5Mbps down 400kbps up
according to speedtest.net.



More information about the linux-arm-kernel mailing list