[PATCH 2/2] ARM: entry: avoid explicit literal loads

Ard Biesheuvel ardb at kernel.org
Tue Mar 1 04:04:40 PST 2022


ARMv7 has MOVW/MOVT instruction pairs to load symbol addresses into
registers without having to rely on literal loads that go via the
D-cache.  For older cores, we now support a similar arrangement, based
on PC-relative group relocations.

This means we can elide most literal loads entirely from the entry path,
by switching to the ldr_va macro to emit the appropriate sequence
depending on the target architecture revision.

While at it, switch to the bl_r macro for invoking the right PABT/DABT
helpers instead of setting the LR register explicitly, which does not
play well with cores that speculate across function returns.

Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
 arch/arm/include/asm/assembler.h | 18 +++++-----
 arch/arm/kernel/entry-armv.S     | 37 ++++----------------
 arch/arm/kernel/entry-common.S   | 10 +-----
 arch/arm/kernel/entry-header.S   |  3 +-
 4 files changed, 18 insertions(+), 50 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 96f4028f7423..3a76241d880f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -656,12 +656,11 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	__adldst_l	str, \src, \sym, \tmp, \cond
 	.endm
 
-	.macro		__ldst_va, op, reg, tmp, sym, cond
+	.macro		__ldst_va, op, reg, tmp, sym, cond, offset
 #if __LINUX_ARM_ARCH__ >= 7 || \
     !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
     (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
 	mov_l		\tmp, \sym, \cond
-	\op\cond	\reg, [\tmp]
 #else
 	/*
 	 * Avoid a literal load, by emitting a sequence of ADD/LDR instructions
@@ -673,20 +672,21 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	.reloc		.L0_\@, R_ARM_ALU_PC_G0_NC, \sym
 	.reloc		.L1_\@, R_ARM_ALU_PC_G1_NC, \sym
 	.reloc		.L2_\@, R_ARM_LDR_PC_G2, \sym
-.L0_\@: sub\cond	\tmp, pc, #8
-.L1_\@: sub\cond	\tmp, \tmp, #4
-.L2_\@: \op\cond	\reg, [\tmp, #0]
+.L0_\@: sub\cond	\tmp, pc, #8 - \offset
+.L1_\@: sub\cond	\tmp, \tmp, #4 - \offset
+.L2_\@:
 #endif
+	\op\cond	\reg, [\tmp, #\offset]
 	.endm
 
 	/*
 	 * ldr_va - load a 32-bit word from the virtual address of \sym
 	 */
-	.macro		ldr_va, rd:req, sym:req, cond, tmp
+	.macro		ldr_va, rd:req, sym:req, cond, tmp, offset=0
 	.ifnb		\tmp
-	__ldst_va	ldr, \rd, \tmp, \sym, \cond
+	__ldst_va	ldr, \rd, \tmp, \sym, \cond, \offset
 	.else
-	__ldst_va	ldr, \rd, \rd, \sym, \cond
+	__ldst_va	ldr, \rd, \rd, \sym, \cond, \offset
 	.endif
 	.endm
 
@@ -694,7 +694,7 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	 * str_va - store a 32-bit word to the virtual address of \sym
 	 */
 	.macro		str_va, rn:req, sym:req, tmp:req, cond
-	__ldst_va	str, \rn, \tmp, \sym, \cond
+	__ldst_va	str, \rn, \tmp, \sym, \cond, 0
 	.endm
 
 	/*
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 5609ca8ae46a..c88a1b5c0ca5 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -90,9 +90,8 @@ UNWIND(	.setfp	fpreg, sp		)
 	.macro	pabt_helper
 	@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
 #ifdef MULTI_PABORT
-	ldr	ip, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [ip, #PROCESSOR_PABT_FUNC]
+	ldr_va	ip, processor, offset=PROCESSOR_PABT_FUNC
+	bl_r	ip
 #else
 	bl	CPU_PABORT_HANDLER
 #endif
@@ -111,9 +110,8 @@ UNWIND(	.setfp	fpreg, sp		)
 	@ the fault status register in r1.  r9 must be preserved.
 	@
 #ifdef MULTI_DABORT
-	ldr	ip, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [ip, #PROCESSOR_DABT_FUNC]
+	ldr_va	ip, processor, offset=PROCESSOR_DABT_FUNC
+	bl_r	ip
 #else
 	bl	CPU_DABORT_HANDLER
 #endif
@@ -331,16 +329,6 @@ __fiq_svc:
  UNWIND(.fnend		)
 ENDPROC(__fiq_svc)
 
-	.align	5
-.LCcralign:
-	.word	cr_alignment
-#ifdef MULTI_DABORT
-.LCprocfns:
-	.word	processor
-#endif
-.LCfp:
-	.word	fp_enter
-
 /*
  * Abort mode handlers
  */
@@ -399,7 +387,7 @@ ENDPROC(__fiq_abt)
  THUMB(	stmia	sp, {r0 - r12}	)
 
  ATRAP(	mrc	p15, 0, r7, c1, c0, 0)
- ATRAP(	ldr	r8, .LCcralign)
+ ATRAP(	ldr_va	r8, cr_alignment)
 
 	ldmia	r0, {r3 - r5}
 	add	r0, sp, #S_PC		@ here for interlock avoidance
@@ -408,8 +396,6 @@ ENDPROC(__fiq_abt)
 	str	r3, [sp]		@ save the "real" r0 copied
 					@ from the exception stack
 
- ATRAP(	ldr	r8, [r8, #0])
-
 	@
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@
@@ -534,9 +520,7 @@ __und_usr_thumb:
  */
 #if __LINUX_ARM_ARCH__ < 7
 /* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-#define NEED_CPU_ARCHITECTURE
-	ldr	r5, .LCcpu_architecture
-	ldr	r5, [r5]
+	ldr_va	r5, cpu_architecture
 	cmp	r5, #CPU_ARCH_ARMv7
 	blo	__und_usr_fault_16		@ 16bit undefined instruction
 /*
@@ -683,12 +667,6 @@ call_fpe:
 	ret.w	lr				@ CP#14 (Debug)
 	ret.w	lr				@ CP#15 (Control)
 
-#ifdef NEED_CPU_ARCHITECTURE
-	.align	2
-.LCcpu_architecture:
-	.word	__cpu_architecture
-#endif
-
 #ifdef CONFIG_NEON
 	.align	6
 
@@ -714,9 +692,8 @@ call_fpe:
 #endif
 
 do_fpe:
-	ldr	r4, .LCfp
 	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
-	ldr	pc, [r4]			@ Call FP module USR entry point
+	ldr_va	pc, fp_enter, tmp=r4		@ Call FP module USR entry point
 
 /*
  * The FP module is called with these registers set:
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index c928d6b04cce..f48ef2378d9b 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -174,7 +174,7 @@ ENTRY(vector_swi)
 #endif
 	reload_current r10, ip
 	zero_fp
-	alignment_trap r10, ip, __cr_alignment
+	alignment_trap r10, ip, cr_alignment
 	asm_trace_hardirqs_on save=0
 	enable_irq_notrace
 	ct_user_exit save=0
@@ -304,14 +304,6 @@ __sys_trace_return:
 	bl	syscall_trace_exit
 	b	ret_slow_syscall
 
-	.align	5
-#ifdef CONFIG_ALIGNMENT_TRAP
-	.type	__cr_alignment, #object
-__cr_alignment:
-	.word	cr_alignment
-#endif
-	.ltorg
-
 	.macro	syscall_table_start, sym
 	.equ	__sys_nr, 0
 	.type	\sym, #object
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 9a1dc142f782..5865621bf691 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -48,8 +48,7 @@
 	.macro	alignment_trap, rtmp1, rtmp2, label
 #ifdef CONFIG_ALIGNMENT_TRAP
 	mrc	p15, 0, \rtmp2, c1, c0, 0
-	ldr	\rtmp1, \label
-	ldr	\rtmp1, [\rtmp1]
+	ldr_va	\rtmp1, \label
 	teq	\rtmp1, \rtmp2
 	mcrne	p15, 0, \rtmp1, c1, c0, 0
 #endif
-- 
2.30.2




More information about the linux-arm-kernel mailing list