[PATCH v3 11/13] ARM: cacheflush: avoid clobbering the frame pointer

Ard Biesheuvel ardb at kernel.org
Thu Feb 3 00:22:02 PST 2022


Thumb2 uses R7 rather than R11 as the frame pointer, and even if we
rarely use a frame pointer to begin with when building in Thumb2 mode,
there are cases where it is required by the compiler (Clang when
inserting profiling hooks via -pg)

However, preserving and restoring the frame pointer is risky, as any
unhandled exceptions raised in the mean time will produce a bogus
backtrace, and it would be better not to touch the frame pointer at all.
This is the case even when CONFIG_FRAME_POINTER is not set, as the
unwind directive used by the unwinder may also use R7 or R11 as the
unwind anchor, even if the frame pointer is not managed strictly
according to the frame pointer ABI.

So let's tweak the cacheflush asm code not to clobber R7 or R11 at all,
so that we can drop R7 from the clobber lists of the inline asm blocks
that call these routines, and remove the code that preserves/restores
R11.

Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
 arch/arm/include/asm/cacheflush.h  | 12 ++----
 arch/arm/mach-exynos/mcpm-exynos.c |  6 +--
 arch/arm/mm/cache-v7.S             | 40 +++++++++-----------
 3 files changed, 23 insertions(+), 35 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index e68fb879e4f9..d27782331556 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -446,15 +446,10 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
  *   however some exceptions may exist.  Caveat emptor.
  *
  * - The clobber list is dictated by the call to v7_flush_dcache_*.
- *   fp is preserved to the stack explicitly prior disabling the cache
- *   since adding it to the clobber list is incompatible with having
- *   CONFIG_FRAME_POINTER=y.  ip is saved as well if ever r12-clobbering
- *   trampoline are inserted by the linker and to keep sp 64-bit aligned.
  */
 #define v7_exit_coherency_flush(level) \
 	asm volatile( \
 	".arch	armv7-a \n\t" \
-	"stmfd	sp!, {fp, ip} \n\t" \
 	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR \n\t" \
 	"bic	r0, r0, #"__stringify(CR_C)" \n\t" \
 	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR \n\t" \
@@ -464,10 +459,9 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
 	"bic	r0, r0, #(1 << 6)	@ disable local coherency \n\t" \
 	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR \n\t" \
 	"isb	\n\t" \
-	"dsb	\n\t" \
-	"ldmfd	sp!, {fp, ip}" \
-	: : : "r0","r1","r2","r3","r4","r5","r6","r7", \
-	      "r9","r10","lr","memory" )
+	"dsb" \
+	: : : "r0","r1","r2","r3","r4","r5","r6", \
+	      "r9","r10","ip","lr","memory" )
 
 void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
 			     void *kaddr, unsigned long len);
diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
index cd861c57d5ad..fd0dbeb93357 100644
--- a/arch/arm/mach-exynos/mcpm-exynos.c
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -35,7 +35,6 @@ static bool secure_firmware __ro_after_init;
  */
 #define exynos_v7_exit_coherency_flush(level) \
 	asm volatile( \
-	"stmfd	sp!, {fp, ip}\n\t"\
 	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
 	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
 	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
@@ -50,11 +49,10 @@ static bool secure_firmware __ro_after_init;
 	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
 	"isb\n\t" \
 	"dsb\n\t" \
-	"ldmfd	sp!, {fp, ip}" \
 	: \
 	: "Ir" (pmu_base_addr + S5P_INFORM0) \
-	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
-	  "r9", "r10", "lr", "memory")
+	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", \
+	  "r9", "r10", "ip", "lr", "memory")
 
 static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster)
 {
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 830bbfb26ca5..b9f55f0bc278 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -90,7 +90,7 @@ ENDPROC(v7_flush_icache_all)
  *
  *     Flush the D-cache up to the Level of Unification Inner Shareable
  *
- *     Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ *     Corrupted registers: r0-r6, r9-r10
  */
 
 ENTRY(v7_flush_dcache_louis)
@@ -117,7 +117,7 @@ ENDPROC(v7_flush_dcache_louis)
  *
  *	Flush the whole D-cache.
  *
- *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ *	Corrupted registers: r0-r6, r9-r10
  *
  *	- mm    - mm_struct describing address space
  */
@@ -149,22 +149,22 @@ flush_levels:
 	movw	r4, #0x3ff
 	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
 	clz	r5, r4				@ find bit position of way size increment
-	movw	r7, #0x7fff
-	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+	movw	r6, #0x7fff
+	and	r1, r6, r1, lsr #13		@ extract max number of the index size
+	mov	r6, #1
+	movne	r4, r4, lsl r5			@ # of ways shifted into bits [31:...]
+	movne	r6, r6, lsl r5			@ 1 shifted left by same amount
 loop1:
-	mov	r9, r7				@ create working copy of max index
+	mov	r9, r1				@ create working copy of max index
 loop2:
- ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
- THUMB(	lsl	r6, r4, r5		)
- THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
- ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
- THUMB(	lsl	r6, r9, r2		)
- THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
-	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
+	mov	r5, r9, lsl r2			@ factor way into r5
+	orr	r5, r5, r4			@ factor index number into r5
+	orr	r5, r5, r10			@ factor cache number into r5
+	mcr	p15, 0, r5, c7, c14, 2		@ clean & invalidate by set/way
 	subs	r9, r9, #1			@ decrement the index
 	bge	loop2
-	subs	r4, r4, #1			@ decrement the way
-	bge	loop1
+	subs	r4, r4, r6			@ decrement the way
+	bcs	loop1
 skip:
 	add	r10, r10, #2			@ increment cache number
 	cmp	r3, r10
@@ -192,14 +192,12 @@ ENDPROC(v7_flush_dcache_all)
  *
  */
 ENTRY(v7_flush_kern_cache_all)
- ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	stmfd	sp!, {r4-r6, r9-r10, lr}
 	bl	v7_flush_dcache_all
 	mov	r0, #0
 	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
 	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
- ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	ldmfd	sp!, {r4-r6, r9-r10, lr}
 	ret	lr
 ENDPROC(v7_flush_kern_cache_all)
 
@@ -210,14 +208,12 @@ ENDPROC(v7_flush_kern_cache_all)
  *     Invalidate the I-cache to the point of unification.
  */
 ENTRY(v7_flush_kern_cache_louis)
- ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	stmfd	sp!, {r4-r6, r9-r10, lr}
 	bl	v7_flush_dcache_louis
 	mov	r0, #0
 	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
 	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
- ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	ldmfd	sp!, {r4-r6, r9-r10, lr}
 	ret	lr
 ENDPROC(v7_flush_kern_cache_louis)
 
-- 
2.30.2




More information about the linux-arm-kernel mailing list