[PATCH 2/3] ARM: cache-v7: refactor v7_invalidate_l1 to avoid clobbering r5/r6
Ard Biesheuvel
ardb at kernel.org
Mon Feb 8 17:49:58 EST 2021
The cache invalidation code in v7_invalidate_l1 can be tweaked to
re-read the associativity from CCSIDR, and keep the set/way identifier
component in a single register that is assigned in the outer loop. This
way, we need 2 registers less.
Given that the number of sets is typically much larger than the
associativity, rearrange the code so that the outer loop has the fewer
number of iterations, ensuring that the re-read of CCSIDR only occurs a
handful of times in practice.
Fix the whitespace while at it, and update the comment to indicate that
this code is no longer a clone of anything else.
Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
arch/arm/mm/cache-v7.S | 51 ++++++++++----------
1 file changed, 25 insertions(+), 26 deletions(-)
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 307f381eee71..4544af4855f6 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -33,9 +33,8 @@ icache_size:
* processor. We fix this by performing an invalidate, rather than a
* clean + invalidate, before jumping into the kernel.
*
- * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
- * to be called for both secondary cores startup and primary core resume
- * procedures.
+ * This function needs to be called for both secondary cores startup and
+ * primary core resume procedures.
*/
ENTRY(v7_invalidate_l1)
mov r0, #0
@@ -43,32 +42,32 @@ ENTRY(v7_invalidate_l1)
isb
mrc p15, 1, r0, c0, c0, 0 @ read cache geometry from CCSIDR
- movw r1, #0x7fff
- and r2, r1, r0, lsr #13
+ movw r3, #0x3ff
+ and r3, r3, r0, lsr #3 @ 'Associativity' in CCSIDR[12:3]
+ clz r1, r3 @ WayShift
+ mov r2, #1
+ mov r3, r3, lsl r1 @ NumWays-1 shifted into bits [31:...]
+ movs r1, r2, lsl r1 @ #1 shifted left by same amount
+ moveq r1, #1 @ r1 needs value > 0 even if only 1 way
- movw r1, #0x3ff
+ and r2, r0, #0x7
+ add r2, r2, #4 @ SetShift
- and r3, r1, r0, lsr #3 @ NumWays - 1
- add r2, r2, #1 @ NumSets
+1: movw r4, #0x7fff
+ and r0, r4, r0, lsr #13 @ 'NumSets' in CCSIDR[27:13]
- and r0, r0, #0x7
- add r0, r0, #4 @ SetShift
-
- clz r1, r3 @ WayShift
- add r4, r3, #1 @ NumWays
-1: sub r2, r2, #1 @ NumSets--
- mov r3, r4 @ Temp = NumWays
-2: subs r3, r3, #1 @ Temp--
- mov r5, r3, lsl r1
- mov r6, r2, lsl r0
- orr r5, r5, r6 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
- mcr p15, 0, r5, c7, c6, 2
- bgt 2b
- cmp r2, #0
- bgt 1b
- dsb st
- isb
- ret lr
+2: mov r4, r0, lsl r2 @ NumSet << SetShift
+ orr r4, r4, r3 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+ mcr p15, 0, r4, c7, c6, 2
+ subs r0, r0, #1 @ Set--
+ bpl 2b
+ subs r3, r3, r1 @ Way--
+ bmi 3f
+ mrc p15, 1, r0, c0, c0, 0 @ re-read cache geometry from CCSIDR
+ b 1b
+3: dsb st
+ isb
+ ret lr
ENDPROC(v7_invalidate_l1)
/*
--
2.20.1
More information about the linux-arm-kernel
mailing list