[PATCH] arm64: implement support for static call trampolines

Ard Biesheuvel ardb at kernel.org
Mon Oct 19 10:12:47 EDT 2020


Implement arm64 support for the 'unoptimized' static call variety,
which routes all calls through a single trampoline that is patched
to perform a tail call to the selected function.

Since static call targets may be located in modules loaded out of
direct branching range, we need to be able to fall back to issuing
a ADRP/ADD pair to load the branch target into R16 and use a BR
instruction. As this involves patching more than a single B or NOP
instruction (for which the architecture makes special provisions
in terms of the synchronization needed), we should take care to
only use aarch64_insn_patch_text_nosync() if the subsequent
instruction is still a 'RET' (which guarantees that the one being
patched is a B or a NOP)

Cc: Peter Zijlstra (Intel) <peterz at infradead.org>
Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
Note that I needed the changes after the --8<-- below to enable the
selftest, since it assumes HAVE_STATIC_CALL_INLINE

 arch/arm64/Kconfig                   |  1 +
 arch/arm64/include/asm/static_call.h | 26 ++++++++++
 arch/arm64/kernel/Makefile           |  2 +-
 arch/arm64/kernel/static_call.c      | 52 ++++++++++++++++++++
 arch/arm64/kernel/vmlinux.lds.S      |  1 +
 5 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 96633b174e39..7841026694e8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -172,6 +172,7 @@ config ARM64
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_STATIC_CALL
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select MMU_GATHER_RCU_TABLE_FREE
diff --git a/arch/arm64/include/asm/static_call.h b/arch/arm64/include/asm/static_call.h
new file mode 100644
index 000000000000..d70e6495768a
--- /dev/null
+++ b/arch/arm64/include/asm/static_call.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_STATIC_CALL_H
+#define _ASM_STATIC_CALL_H
+
+#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns)			\
+	asm(".pushsection	.static_call.text, \"ax\"	\n"	\
+	    ".align		2				\n"	\
+	    ".globl		" STATIC_CALL_TRAMP_STR(name) "	\n"	\
+	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
+	    "hint 34;		/* BTI C */			\n"	\
+	    insns "						\n"	\
+	    ".popsection					\n")
+
+/*
+ * We have to account for the possibility that the static call site may
+ * be updated to refer to a target that is out of range for an ordinary
+ * 'B' branch instruction, and so we need to pre-allocate some space for
+ * a ADRP/ADD/BR sequence.
+ */
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)			\
+	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "b " #func "; ret; br x16")
+
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)			\
+	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "nop; ret; br x16")
+
+#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index bbaf0bc4ad60..f579800eb860 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -17,7 +17,7 @@ obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
-			   syscall.o proton-pack.o
+			   syscall.o proton-pack.o static_call.o
 
 targets			+= efi-entry.o
 
diff --git a/arch/arm64/kernel/static_call.c b/arch/arm64/kernel/static_call.c
new file mode 100644
index 000000000000..e6e42b52aea4
--- /dev/null
+++ b/arch/arm64/kernel/static_call.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/static_call.h>
+#include <linux/memory.h>
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+
+/*
+ * The static call trampoline consists of one of the following sequences:
+ *
+ *        (A)                  (B)                  (C)
+ * 0x0:  BTI   C              BTI   C              BTI   C
+ * 0x4:  B     <func>         NOP                  ADRP  X16, <func>
+ * 0x8:  RET                  RET                  ADD   X16, X16, :lo12:<func>
+ * 0xc:  BR    X16            BR    X16            BR    X16
+ *
+ * The architecture permits us to patch B instructions into NOPs or vice versa
+ * directly, but patching a multi-instruction sequence requires careful
+ * synchronization.
+ */
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+	unsigned long pc = (unsigned long)tramp + 4;
+	unsigned long dst = (unsigned long)func;
+	void *addrs[] = { (void *)pc, (void *)(pc + 4) };
+	u32 ret = le32_to_cpup(addrs[1]);
+	u32 insn[2];
+
+	insn[0] = func ? aarch64_insn_gen_branch_imm(pc, dst,
+						     AARCH64_INSN_BRANCH_NOLINK)
+		       : AARCH64_INSN_HINT_NOP;
+	insn[1] = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_30,
+					      AARCH64_INSN_BRANCH_RETURN);
+
+	if (ret == insn[1] && insn[0] != AARCH64_BREAK_FAULT) {
+		aarch64_insn_patch_text_nosync(addrs[0], insn[0]);
+		return;
+	}
+
+	if (insn[0] == AARCH64_BREAK_FAULT) {
+		insn[0] = aarch64_insn_gen_adr(pc, dst, AARCH64_INSN_REG_16,
+					       AARCH64_INSN_ADR_TYPE_ADRP);
+		insn[1] = aarch64_insn_gen_add_sub_imm(AARCH64_INSN_REG_16,
+						       AARCH64_INSN_REG_16,
+						       dst % SZ_4K,
+						       AARCH64_INSN_VARIANT_64BIT,
+						       AARCH64_INSN_ADSB_ADD);
+
+		BUG_ON(insn[0] == AARCH64_BREAK_FAULT);
+	}
+	aarch64_insn_patch_text(addrs, insn, ARRAY_SIZE(insn));
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5ca957e656ab..77f9f23934db 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -123,6 +123,7 @@ SECTIONS
 			IDMAP_TEXT
 			HIBERNATE_TEXT
 			TRAMP_TEXT
+			STATIC_CALL_TEXT
 			*(.fixup)
 			*(.gnu.warning)
 		. = ALIGN(16);
-- 
2.17.1


---------------8<---------------------
diff --git a/kernel/Makefile b/kernel/Makefile
index b74820d8b264..3d7fec961b40 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -111,7 +111,7 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
 obj-$(CONFIG_KCSAN) += kcsan/
 obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
-obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
+obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 
diff --git a/kernel/static_call.c b/kernel/static_call.c
index 84565c2a41b8..efee9f72dc06 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -10,6 +10,8 @@
 #include <linux/processor.h>
 #include <asm/sections.h>
 
+#ifdef CONFIG_STATIC_CALL_INLINE
+
 extern struct static_call_site __start_static_call_sites[],
                               __stop_static_call_sites[];
 
@@ -437,6 +439,7 @@ int __init static_call_init(void)
        return 0;
 }
 early_initcall(static_call_init);
+#endif
 
 #ifdef CONFIG_STATIC_CALL_SELFTEST
 



More information about the linux-arm-kernel mailing list