[PATCH 2/2] ARM: entry: avoid explicit literal loads
Ard Biesheuvel
ardb at kernel.org
Tue Mar 1 04:04:40 PST 2022
ARMv7 has MOVW/MOVT instruction pairs to load symbol addresses into
registers without having to rely on literal loads that go via the
D-cache. For older cores, we now support a similar arrangement, based
on PC-relative group relocations.
This means we can elide most literal loads entirely from the entry path,
by switching to the ldr_va macro to emit the appropriate sequence
depending on the target architecture revision.
While at it, switch to the bl_r macro for invoking the right PABT/DABT
helpers instead of setting the LR register explicitly, which does not
play well with cores that speculate across function returns.
Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
arch/arm/include/asm/assembler.h | 18 +++++-----
arch/arm/kernel/entry-armv.S | 37 ++++----------------
arch/arm/kernel/entry-common.S | 10 +-----
arch/arm/kernel/entry-header.S | 3 +-
4 files changed, 18 insertions(+), 50 deletions(-)
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 96f4028f7423..3a76241d880f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -656,12 +656,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
__adldst_l str, \src, \sym, \tmp, \cond
.endm
- .macro __ldst_va, op, reg, tmp, sym, cond
+ .macro __ldst_va, op, reg, tmp, sym, cond, offset
#if __LINUX_ARM_ARCH__ >= 7 || \
!defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
mov_l \tmp, \sym, \cond
- \op\cond \reg, [\tmp]
#else
/*
* Avoid a literal load, by emitting a sequence of ADD/LDR instructions
@@ -673,20 +672,21 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
.reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym
.reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym
.reloc .L2_\@, R_ARM_LDR_PC_G2, \sym
-.L0_\@: sub\cond \tmp, pc, #8
-.L1_\@: sub\cond \tmp, \tmp, #4
-.L2_\@: \op\cond \reg, [\tmp, #0]
+.L0_\@: sub\cond \tmp, pc, #8 - \offset
+.L1_\@: sub\cond \tmp, \tmp, #4 - \offset
+.L2_\@:
#endif
+ \op\cond \reg, [\tmp, #\offset]
.endm
/*
* ldr_va - load a 32-bit word from the virtual address of \sym
*/
- .macro ldr_va, rd:req, sym:req, cond, tmp
+ .macro ldr_va, rd:req, sym:req, cond, tmp, offset=0
.ifnb \tmp
- __ldst_va ldr, \rd, \tmp, \sym, \cond
+ __ldst_va ldr, \rd, \tmp, \sym, \cond, \offset
.else
- __ldst_va ldr, \rd, \rd, \sym, \cond
+ __ldst_va ldr, \rd, \rd, \sym, \cond, \offset
.endif
.endm
@@ -694,7 +694,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
* str_va - store a 32-bit word to the virtual address of \sym
*/
.macro str_va, rn:req, sym:req, tmp:req, cond
- __ldst_va str, \rn, \tmp, \sym, \cond
+ __ldst_va str, \rn, \tmp, \sym, \cond, 0
.endm
/*
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 5609ca8ae46a..c88a1b5c0ca5 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -90,9 +90,8 @@ UNWIND( .setfp fpreg, sp )
.macro pabt_helper
@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
#ifdef MULTI_PABORT
- ldr ip, .LCprocfns
- mov lr, pc
- ldr pc, [ip, #PROCESSOR_PABT_FUNC]
+ ldr_va ip, processor, offset=PROCESSOR_PABT_FUNC
+ bl_r ip
#else
bl CPU_PABORT_HANDLER
#endif
@@ -111,9 +110,8 @@ UNWIND( .setfp fpreg, sp )
@ the fault status register in r1. r9 must be preserved.
@
#ifdef MULTI_DABORT
- ldr ip, .LCprocfns
- mov lr, pc
- ldr pc, [ip, #PROCESSOR_DABT_FUNC]
+ ldr_va ip, processor, offset=PROCESSOR_DABT_FUNC
+ bl_r ip
#else
bl CPU_DABORT_HANDLER
#endif
@@ -331,16 +329,6 @@ __fiq_svc:
UNWIND(.fnend )
ENDPROC(__fiq_svc)
- .align 5
-.LCcralign:
- .word cr_alignment
-#ifdef MULTI_DABORT
-.LCprocfns:
- .word processor
-#endif
-.LCfp:
- .word fp_enter
-
/*
* Abort mode handlers
*/
@@ -399,7 +387,7 @@ ENDPROC(__fiq_abt)
THUMB( stmia sp, {r0 - r12} )
ATRAP( mrc p15, 0, r7, c1, c0, 0)
- ATRAP( ldr r8, .LCcralign)
+ ATRAP( ldr_va r8, cr_alignment)
ldmia r0, {r3 - r5}
add r0, sp, #S_PC @ here for interlock avoidance
@@ -408,8 +396,6 @@ ENDPROC(__fiq_abt)
str r3, [sp] @ save the "real" r0 copied
@ from the exception stack
- ATRAP( ldr r8, [r8, #0])
-
@
@ We are now ready to fill in the remaining blanks on the stack:
@
@@ -534,9 +520,7 @@ __und_usr_thumb:
*/
#if __LINUX_ARM_ARCH__ < 7
/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-#define NEED_CPU_ARCHITECTURE
- ldr r5, .LCcpu_architecture
- ldr r5, [r5]
+ ldr_va r5, cpu_architecture
cmp r5, #CPU_ARCH_ARMv7
blo __und_usr_fault_16 @ 16bit undefined instruction
/*
@@ -683,12 +667,6 @@ call_fpe:
ret.w lr @ CP#14 (Debug)
ret.w lr @ CP#15 (Control)
-#ifdef NEED_CPU_ARCHITECTURE
- .align 2
-.LCcpu_architecture:
- .word __cpu_architecture
-#endif
-
#ifdef CONFIG_NEON
.align 6
@@ -714,9 +692,8 @@ call_fpe:
#endif
do_fpe:
- ldr r4, .LCfp
add r10, r10, #TI_FPSTATE @ r10 = workspace
- ldr pc, [r4] @ Call FP module USR entry point
+ ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
/*
* The FP module is called with these registers set:
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index c928d6b04cce..f48ef2378d9b 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -174,7 +174,7 @@ ENTRY(vector_swi)
#endif
reload_current r10, ip
zero_fp
- alignment_trap r10, ip, __cr_alignment
+ alignment_trap r10, ip, cr_alignment
asm_trace_hardirqs_on save=0
enable_irq_notrace
ct_user_exit save=0
@@ -304,14 +304,6 @@ __sys_trace_return:
bl syscall_trace_exit
b ret_slow_syscall
- .align 5
-#ifdef CONFIG_ALIGNMENT_TRAP
- .type __cr_alignment, #object
-__cr_alignment:
- .word cr_alignment
-#endif
- .ltorg
-
.macro syscall_table_start, sym
.equ __sys_nr, 0
.type \sym, #object
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 9a1dc142f782..5865621bf691 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -48,8 +48,7 @@
.macro alignment_trap, rtmp1, rtmp2, label
#ifdef CONFIG_ALIGNMENT_TRAP
mrc p15, 0, \rtmp2, c1, c0, 0
- ldr \rtmp1, \label
- ldr \rtmp1, [\rtmp1]
+ ldr_va \rtmp1, \label
teq \rtmp1, \rtmp2
mcrne p15, 0, \rtmp1, c1, c0, 0
#endif
--
2.30.2
More information about the linux-arm-kernel
mailing list