[PATCH v2] ARM: avoid literal references in inline assembly

Ard Biesheuvel ardb at kernel.org
Wed Dec 22 02:49:39 PST 2021


Nathan reports that the new get_current() and per-CPU offset accessors
may cause problems at build time due to the use of a literal to hold the
address of the respective variables. This is due to the fact that LLD
before v14 does not support the PC-relative group relocations that are
normally used for this, and the fallback relies on literals but does not
emit the literal pools explictly using the .ltorg directive.

./arch/arm/include/asm/current.h:53:6: error: out of range pc-relative fixup value
        asm(LOAD_SYM_ARMV6(%0, __current) : "=r"(cur));
            ^
./arch/arm/include/asm/insn.h:25:2: note: expanded from macro 'LOAD_SYM_ARMV6'
        "       ldr     " #reg ", =" #sym "                     \n\t"   \
        ^
<inline asm>:1:3: note: instantiated into assembly here
                ldr     r0, =__current
                ^

Since emitting a literal pool in this particular case is not possible,
let's avoid the LOAD_SYM_ARMV6() entirely, and use the ordinary C
assigment instead.

As it turns out, there are other such cases, and here, using .ltorg to
emit the literal pool within range of the LDR instruction would be
possible due to the presence of an unconditional branch right after it.
Unfortunately, putting .ltorg directives in subsections appears to
confuse the Clang inline assembler, resulting in similar errors even
though the .ltorg is most definitely within range.

So let's fix this by emitting the literal explicitly, and not rely on
the assembler to figure this out. This means we have move the fallback
out of the LOAD_SYM_ARMV6() macro and into the callers.

Fixes: 9c46929e7989 ("ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems")
Reported-by: Nathan Chancellor <natechancellor at gmail.com>
Link: https://github.com/ClangBuiltLinux/linux/issues/1551
Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
Marked as v2 as it supersedes [0], which did not fully mitigate the
issue.

[0] https://lore.kernel.org/all/20211220225217.458335-1-ardb@kernel.org/

 arch/arm/include/asm/current.h | 13 +++++++++++--
 arch/arm/include/asm/insn.h    |  7 -------
 arch/arm/include/asm/percpu.h  |  8 ++++++++
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
index 69ecf4c6c725..2f9d79214b25 100644
--- a/arch/arm/include/asm/current.h
+++ b/arch/arm/include/asm/current.h
@@ -32,27 +32,36 @@ static inline __attribute_const__ struct task_struct *get_current(void)
 	 * https://github.com/ClangBuiltLinux/linux/issues/1485
 	 */
 	cur = __builtin_thread_pointer();
 #elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
 	asm("0:	mrc p15, 0, %0, c13, c0, 3			\n\t"
 #ifdef CONFIG_CPU_V6
 	    "1:							\n\t"
 	    "	.subsection 1					\n\t"
+#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
+    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
 	    "2: " LOAD_SYM_ARMV6(%0, __current) "		\n\t"
 	    "	b	1b					\n\t"
+#else
+	    "2:	ldr	%0, 3f					\n\t"
+	    "	ldr	%0, [%0]				\n\t"
+	    "	b	1b					\n\t"
+	    "3:	.long	__current				\n\t"
+#endif
 	    "	.previous					\n\t"
 	    "	.pushsection \".alt.smp.init\", \"a\"		\n\t"
 	    "	.long	0b - .					\n\t"
 	    "	b	. + (2b - 0b)				\n\t"
 	    "	.popsection					\n\t"
 #endif
 	    : "=r"(cur));
-#elif __LINUX_ARM_ARCH__>=7 || \
-      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
+#elif __LINUX_ARM_ARCH__>= 7 || \
+      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
+      (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
 	cur = __current;
 #else
 	asm(LOAD_SYM_ARMV6(%0, __current) : "=r"(cur));
 #endif
 	return cur;
 }
 
 #define current get_current()
diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h
index a160ed3ea427..faf3d1c28368 100644
--- a/arch/arm/include/asm/insn.h
+++ b/arch/arm/include/asm/insn.h
@@ -5,31 +5,24 @@
 #include <linux/types.h>
 
 /*
  * Avoid a literal load by emitting a sequence of ADD/LDR instructions with the
  * appropriate relocations. The combined sequence has a range of -/+ 256 MiB,
  * which should be sufficient for the core kernel as well as modules loaded
  * into the module region. (Not supported by LLD before release 14)
  */
-#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
-    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
 #define LOAD_SYM_ARMV6(reg, sym)					\
 	"	.globl	" #sym "				\n\t"	\
 	"	.reloc	10f, R_ARM_ALU_PC_G0_NC, " #sym "	\n\t"	\
 	"	.reloc	11f, R_ARM_ALU_PC_G1_NC, " #sym "	\n\t"	\
 	"	.reloc	12f, R_ARM_LDR_PC_G2, " #sym "		\n\t"	\
 	"10:	sub	" #reg ", pc, #8			\n\t"	\
 	"11:	sub	" #reg ", " #reg ", #4			\n\t"	\
 	"12:	ldr	" #reg ", [" #reg ", #0]		\n\t"
-#else
-#define LOAD_SYM_ARMV6(reg, sym)					\
-	"	ldr	" #reg ", =" #sym "			\n\t"	\
-	"	ldr	" #reg ", [" #reg "]			\n\t"
-#endif
 
 static inline unsigned long
 arm_gen_nop(void)
 {
 #ifdef CONFIG_THUMB2_KERNEL
 	return 0xf3af8000; /* nop.w */
 #else
 	return 0xe1a00000; /* mov r0, r0 */
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index a4a0d38d016a..28961d60877d 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -33,18 +33,26 @@ static inline unsigned long __my_cpu_offset(void)
 	 * Read TPIDRPRW.
 	 * We want to allow caching the value, so avoid using volatile and
 	 * instead use a fake stack read to hazard against barrier().
 	 */
 	asm("0:	mrc p15, 0, %0, c13, c0, 4			\n\t"
 #ifdef CONFIG_CPU_V6
 	    "1:							\n\t"
 	    "	.subsection 1					\n\t"
+#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
+    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
 	    "2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) "	\n\t"
 	    "	b	1b					\n\t"
+#else
+	    "2: ldr	%0, 3f					\n\t"
+	    "	ldr	%0, [%0]				\n\t"
+	    "	b	1b					\n\t"
+	    "3:	.long	__per_cpu_offset			\n\t"
+#endif
 	    "	.previous					\n\t"
 	    "	.pushsection \".alt.smp.init\", \"a\"		\n\t"
 	    "	.long	0b - .					\n\t"
 	    "	b	. + (2b - 0b)				\n\t"
 	    "	.popsection					\n\t"
 #endif
 	     : "=r" (off)
 	     : "Q" (*(const unsigned long *)current_stack_pointer));
-- 
2.30.2




More information about the linux-arm-kernel mailing list