[PATCH v3] arm64: implement support for static call trampolines

Ard Biesheuvel ardb at kernel.org
Fri Nov 20 03:21:03 EST 2020


Implement arm64 support for the 'unoptimized' static call variety, which
routes all calls through a single trampoline that is patched to perform a
tail call to the selected function.

Since static call targets may be located in modules loaded out of direct
branching range, we need to use a ADRP/ADD pair to load the branch target
into R16 and use a branch-to-register (BR) instruction to perform an
indirect call. Unlike on x86, there is no pressing need on arm64 to avoid
indirect calls at all cost, but hiding it from the compiler as is done
here does have some benefits:
- the literal is located in .rodata, which gives us the same robustness
  advantage that code patching does;
- no performance hit on CFI enabled Clang builds that decorate compiler
  emitted indirect calls with branch target validity checks.

Cc: Peter Zijlstra (Intel) <peterz at infradead.org>
Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
v3: get rid of any instruction patching altogether, and simply patch the
    target into a literal pool located in .rodata

 arch/arm64/Kconfig                   |  1 +
 arch/arm64/include/asm/insn.h        |  1 +
 arch/arm64/include/asm/static_call.h | 28 ++++++++++++++++++++
 arch/arm64/kernel/Makefile           |  2 +-
 arch/arm64/kernel/insn.c             | 14 +++++++---
 arch/arm64/kernel/static_call.c      | 20 ++++++++++++++
 arch/arm64/kernel/vmlinux.lds.S      |  1 +
 7 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1515f6f153a0..e1598afd736f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -173,6 +173,7 @@ config ARM64
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_STATIC_CALL
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select MMU_GATHER_RCU_TABLE_FREE
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4b39293d0f72..4a748cf88f29 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -382,6 +382,7 @@ static inline bool aarch64_insn_is_adr_adrp(u32 insn)
 
 int aarch64_insn_read(void *addr, u32 *insnp);
 int aarch64_insn_write(void *addr, u32 insn);
+int aarch64_literal_write(void *addr, u64 literal);
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
 bool aarch64_insn_uses_literal(u32 insn);
 bool aarch64_insn_is_branch(u32 insn);
diff --git a/arch/arm64/include/asm/static_call.h b/arch/arm64/include/asm/static_call.h
new file mode 100644
index 000000000000..665ec2a7cdb2
--- /dev/null
+++ b/arch/arm64/include/asm/static_call.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_STATIC_CALL_H
+#define _ASM_STATIC_CALL_H
+
+#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, target)			    \
+	asm("	.pushsection	.static_call.text, \"ax\"		\n" \
+	    "	.align		3					\n" \
+	    "	.globl		" STATIC_CALL_TRAMP_STR(name) "		\n" \
+	    STATIC_CALL_TRAMP_STR(name) ":				\n" \
+	    "	hint 	34	/* BTI C */				\n" \
+	    "	adrp	x16, 1f						\n" \
+	    "	ldr	x16, [x16, :lo12:1f]				\n" \
+	    "	cbz	x16, 0f						\n" \
+	    "	br	x16						\n" \
+	    "0:	ret							\n" \
+	    "	.popsection						\n" \
+	    "	.pushsection	.rodata, \"a\"				\n" \
+	    "	.align		3					\n" \
+	    "1:	.quad		" target "				\n" \
+	    "	.popsection						\n")
+
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)			\
+	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, #func)
+
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)			\
+	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "0x0")
+
+#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index bbaf0bc4ad60..f579800eb860 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -17,7 +17,7 @@ obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
-			   syscall.o proton-pack.o
+			   syscall.o proton-pack.o static_call.o
 
 targets			+= efi-entry.o
 
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 6c0de2f60ea9..dba5068b4a1d 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -136,7 +136,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
 	return ret;
 }
 
-static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
+static int __kprobes __aarch64_text_write(void *addr, void *text, int size)
 {
 	void *waddr = addr;
 	unsigned long flags = 0;
@@ -145,7 +145,7 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
 	raw_spin_lock_irqsave(&patch_lock, flags);
 	waddr = patch_map(addr, FIX_TEXT_POKE0);
 
-	ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
+	ret = copy_to_kernel_nofault(waddr, text, size);
 
 	patch_unmap(FIX_TEXT_POKE0);
 	raw_spin_unlock_irqrestore(&patch_lock, flags);
@@ -155,7 +155,15 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
 
 int __kprobes aarch64_insn_write(void *addr, u32 insn)
 {
-	return __aarch64_insn_write(addr, cpu_to_le32(insn));
+	__le32 i = cpu_to_le32(insn);
+
+	return __aarch64_text_write(addr, &i, AARCH64_INSN_SIZE);
+}
+
+int aarch64_literal_write(void *addr, u64 literal)
+{
+	BUG_ON(!IS_ALIGNED((u64)addr, sizeof(u64)));
+	return __aarch64_text_write(addr, &literal, sizeof(u64));
 }
 
 bool __kprobes aarch64_insn_uses_literal(u32 insn)
diff --git a/arch/arm64/kernel/static_call.c b/arch/arm64/kernel/static_call.c
new file mode 100644
index 000000000000..09f0f5017934
--- /dev/null
+++ b/arch/arm64/kernel/static_call.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/static_call.h>
+#include <linux/memory.h>
+#include <asm/insn.h>
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+	u64 literal;
+	int ret;
+
+	/* decode the instructions to discover the literal address */
+	literal = ALIGN_DOWN((u64)tramp + 4, SZ_4K) +
+		  aarch64_insn_adrp_get_offset(le32_to_cpup(tramp + 4)) +
+		  8 * aarch64_insn_decode_immediate(AARCH64_INSN_IMM_12,
+						    le32_to_cpup(tramp + 8));
+
+	ret = aarch64_literal_write((void *)literal, (u64)func);
+	WARN_ON_ONCE(ret);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1bda604f4c70..5c858582a7f8 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -135,6 +135,7 @@ SECTIONS
 			IDMAP_TEXT
 			HIBERNATE_TEXT
 			TRAMP_TEXT
+			STATIC_CALL_TEXT
 			*(.fixup)
 			*(.gnu.warning)
 		. = ALIGN(16);
-- 
2.17.1




More information about the linux-arm-kernel mailing list