[PATCH v2] ARM: avoid literal references in inline assembly
Nathan Chancellor
nathan at kernel.org
Wed Dec 22 11:13:08 PST 2021
On Wed, Dec 22, 2021 at 11:49:39AM +0100, Ard Biesheuvel wrote:
> Nathan reports that the new get_current() and per-CPU offset accessors
> may cause problems at build time due to the use of a literal to hold the
> address of the respective variables. This is due to the fact that LLD
> before v14 does not support the PC-relative group relocations that are
> normally used for this, and the fallback relies on literals but does not
> emit the literal pools explictly using the .ltorg directive.
>
> ./arch/arm/include/asm/current.h:53:6: error: out of range pc-relative fixup value
> asm(LOAD_SYM_ARMV6(%0, __current) : "=r"(cur));
> ^
> ./arch/arm/include/asm/insn.h:25:2: note: expanded from macro 'LOAD_SYM_ARMV6'
> " ldr " #reg ", =" #sym " \n\t" \
> ^
> <inline asm>:1:3: note: instantiated into assembly here
> ldr r0, =__current
> ^
>
> Since emitting a literal pool in this particular case is not possible,
> let's avoid the LOAD_SYM_ARMV6() entirely, and use the ordinary C
> assigment instead.
>
> As it turns out, there are other such cases, and here, using .ltorg to
> emit the literal pool within range of the LDR instruction would be
> possible due to the presence of an unconditional branch right after it.
> Unfortunately, putting .ltorg directives in subsections appears to
> confuse the Clang inline assembler, resulting in similar errors even
> though the .ltorg is most definitely within range.
>
> So let's fix this by emitting the literal explicitly, and not rely on
> the assembler to figure this out. This means we have move the fallback
> out of the LOAD_SYM_ARMV6() macro and into the callers.
>
> Fixes: 9c46929e7989 ("ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems")
> Reported-by: Nathan Chancellor <natechancellor at gmail.com>
> Link: https://github.com/ClangBuiltLinux/linux/issues/1551
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
I build tested this with LLVM 12, 13, and 14 and boot tested a few
configs in QEMU, nothing screamed.
Tested-by: Nathan Chancellor <nathan at kernel.org>
> ---
> Marked as v2 as it supersedes [0], which did not fully mitigate the
> issue.
>
> [0] https://lore.kernel.org/all/20211220225217.458335-1-ardb@kernel.org/
>
> arch/arm/include/asm/current.h | 13 +++++++++++--
> arch/arm/include/asm/insn.h | 7 -------
> arch/arm/include/asm/percpu.h | 8 ++++++++
> 3 files changed, 19 insertions(+), 9 deletions(-)
>
> diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
> index 69ecf4c6c725..2f9d79214b25 100644
> --- a/arch/arm/include/asm/current.h
> +++ b/arch/arm/include/asm/current.h
> @@ -32,27 +32,36 @@ static inline __attribute_const__ struct task_struct *get_current(void)
> * https://github.com/ClangBuiltLinux/linux/issues/1485
> */
> cur = __builtin_thread_pointer();
> #elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
> asm("0: mrc p15, 0, %0, c13, c0, 3 \n\t"
> #ifdef CONFIG_CPU_V6
> "1: \n\t"
> " .subsection 1 \n\t"
> +#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
> + !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
> "2: " LOAD_SYM_ARMV6(%0, __current) " \n\t"
> " b 1b \n\t"
> +#else
> + "2: ldr %0, 3f \n\t"
> + " ldr %0, [%0] \n\t"
> + " b 1b \n\t"
> + "3: .long __current \n\t"
> +#endif
> " .previous \n\t"
> " .pushsection \".alt.smp.init\", \"a\" \n\t"
> " .long 0b - . \n\t"
> " b . + (2b - 0b) \n\t"
> " .popsection \n\t"
> #endif
> : "=r"(cur));
> -#elif __LINUX_ARM_ARCH__>=7 || \
> - (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
> +#elif __LINUX_ARM_ARCH__>= 7 || \
> + (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
> + (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
> cur = __current;
> #else
> asm(LOAD_SYM_ARMV6(%0, __current) : "=r"(cur));
> #endif
> return cur;
> }
>
> #define current get_current()
> diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h
> index a160ed3ea427..faf3d1c28368 100644
> --- a/arch/arm/include/asm/insn.h
> +++ b/arch/arm/include/asm/insn.h
> @@ -5,31 +5,24 @@
> #include <linux/types.h>
>
> /*
> * Avoid a literal load by emitting a sequence of ADD/LDR instructions with the
> * appropriate relocations. The combined sequence has a range of -/+ 256 MiB,
> * which should be sufficient for the core kernel as well as modules loaded
> * into the module region. (Not supported by LLD before release 14)
> */
> -#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
> - !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
> #define LOAD_SYM_ARMV6(reg, sym) \
> " .globl " #sym " \n\t" \
> " .reloc 10f, R_ARM_ALU_PC_G0_NC, " #sym " \n\t" \
> " .reloc 11f, R_ARM_ALU_PC_G1_NC, " #sym " \n\t" \
> " .reloc 12f, R_ARM_LDR_PC_G2, " #sym " \n\t" \
> "10: sub " #reg ", pc, #8 \n\t" \
> "11: sub " #reg ", " #reg ", #4 \n\t" \
> "12: ldr " #reg ", [" #reg ", #0] \n\t"
> -#else
> -#define LOAD_SYM_ARMV6(reg, sym) \
> - " ldr " #reg ", =" #sym " \n\t" \
> - " ldr " #reg ", [" #reg "] \n\t"
> -#endif
>
> static inline unsigned long
> arm_gen_nop(void)
> {
> #ifdef CONFIG_THUMB2_KERNEL
> return 0xf3af8000; /* nop.w */
> #else
> return 0xe1a00000; /* mov r0, r0 */
> diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
> index a4a0d38d016a..28961d60877d 100644
> --- a/arch/arm/include/asm/percpu.h
> +++ b/arch/arm/include/asm/percpu.h
> @@ -33,18 +33,26 @@ static inline unsigned long __my_cpu_offset(void)
> * Read TPIDRPRW.
> * We want to allow caching the value, so avoid using volatile and
> * instead use a fake stack read to hazard against barrier().
> */
> asm("0: mrc p15, 0, %0, c13, c0, 4 \n\t"
> #ifdef CONFIG_CPU_V6
> "1: \n\t"
> " .subsection 1 \n\t"
> +#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
> + !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
> "2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) " \n\t"
> " b 1b \n\t"
> +#else
> + "2: ldr %0, 3f \n\t"
> + " ldr %0, [%0] \n\t"
> + " b 1b \n\t"
> + "3: .long __per_cpu_offset \n\t"
> +#endif
> " .previous \n\t"
> " .pushsection \".alt.smp.init\", \"a\" \n\t"
> " .long 0b - . \n\t"
> " b . + (2b - 0b) \n\t"
> " .popsection \n\t"
> #endif
> : "=r" (off)
> : "Q" (*(const unsigned long *)current_stack_pointer));
> --
> 2.30.2
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
More information about the linux-arm-kernel
mailing list