[PATCH v3 3/3] arm64: implement dynamic shadow call stack for Clang
Sami Tolvanen
samitolvanen at google.com
Wed Jun 15 14:32:53 PDT 2022
On Mon, Jun 13, 2022 at 03:40:08PM +0200, Ard Biesheuvel wrote:
> Implement dynamic shadow call stack support on Clang, by parsing the
> unwind tables at init time to locate all occurrences of PACIASP/AUTIASP
> instructions, and replacing them with the shadow call stack push and pop
> instructions, respectively.
>
> This is useful because the overhead of the shadow call stack is
> difficult to justify on hardware that implements pointer authentication
> (PAC), and given that the PAC instructions are executed as NOPs on
> hardware that doesn't, we can just replace them without breaking
> anything. As PACIASP/AUTIASP are guaranteed to be paired with respect to
> manipulations of the return address, replacing them 1:1 with shadow call
> stack pushes and pops is guaranteed to result in the desired behavior.
>
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> ---
> arch/arm64/Kconfig | 9 +
> arch/arm64/Makefile | 10 +-
> arch/arm64/include/asm/scs.h | 45 ++++
> arch/arm64/kernel/Makefile | 2 +
> arch/arm64/kernel/head.S | 3 +
> arch/arm64/kernel/irq.c | 2 +-
> arch/arm64/kernel/module.c | 8 +
> arch/arm64/kernel/patch-scs.c | 257 ++++++++++++++++++++
> arch/arm64/kernel/sdei.c | 2 +-
> arch/arm64/kernel/setup.c | 4 +
> 10 files changed, 338 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5f92344edff5..9ff72e582522 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -369,6 +369,15 @@ config KASAN_SHADOW_OFFSET
> config UNWIND_TABLES
> bool
>
> +config UNWIND_PATCH_PAC_INTO_SCS
> + bool "Enable shadow call stack dynamically using code patching"
> + # needs Clang with https://reviews.llvm.org/D111780 incorporated
> + depends on CC_IS_CLANG && CLANG_VERSION >= 150000
> + depends on ARM64_PTR_AUTH_KERNEL && CC_HAS_BRANCH_PROT_PAC_RET
> + depends on SHADOW_CALL_STACK
> + select UNWIND_TABLES
> + select DYNAMIC_SCS
> +
> source "arch/arm64/Kconfig.platforms"
>
> menu "Kernel Features"
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index 4fbca56fa602..e439ebbd167d 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -77,10 +77,16 @@ branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=
> # We enable additional protection for leaf functions as there is some
> # narrow potential for ROP protection benefits and no substantial
> # performance impact has been observed.
> +PACRET-y := pac-ret+leaf
> +
> +# Using a shadow call stack in leaf functions is too costly, so avoid PAC there
> +# as well when we may be patching PAC into SCS
> +PACRET-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) := pac-ret
> +
> ifeq ($(CONFIG_ARM64_BTI_KERNEL),y)
> -branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=pac-ret+leaf+bti
> +branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=$(PACRET-y)+bti
> else
> -branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=pac-ret+leaf
> +branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=$(PACRET-y)
> endif
> # -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the
> # compiler to generate them and consequently to break the single image contract
> diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
> index 8297bccf0784..51fcfc96ba71 100644
> --- a/arch/arm64/include/asm/scs.h
> +++ b/arch/arm64/include/asm/scs.h
> @@ -24,6 +24,51 @@
> .endm
> #endif /* CONFIG_SHADOW_CALL_STACK */
>
> +
> +#else
> +
> +#include <linux/scs.h>
> +#include <asm/cpufeature.h>
> +
> +#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
> +static inline bool should_patch_pac_into_scs(void)
> +{
> + /*
> + * We only enable the shadow call stack dynamically if we are running
> + * on a system that does not implement PAC or BTI. PAC and SCS provide
> + * roughly the same level of protection, and BTI relies on the PACIASP
> + * instructions serving as landing pads, preventing us from patching
> + * those instructions into something else.
> + */
> + u64 reg = read_sysreg_s(SYS_ID_AA64ISAR1_EL1);
> +
> + if (reg & ((0xf << ID_AA64ISAR1_APA_SHIFT) |
> + (0xf << ID_AA64ISAR1_API_SHIFT)))
> + return false;
> +
> + reg = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
> + if (reg & (0xf << ID_AA64ISAR2_APA3_SHIFT))
> + return false;
> +
> + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
> + reg = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
> + if (reg & (0xf << ID_AA64PFR1_BT_SHIFT))
> + return false;
> + }
> + return true;
> +}
> +
> +static inline void dynamic_scs_init(void)
> +{
> + if (should_patch_pac_into_scs())
> + static_branch_enable(&dynamic_scs_enabled);
> +}
Should we print out a message to indicate we are actually enabling SCS
at runtime? Otherwise I think the only way to know would be to look at
/proc/meminfo, for example.
> +#else
> +static inline void dynamic_scs_init(void) {}
> +#endif
> +
> +int scs_patch(const u8 eh_frame[], int size);
> +
> #endif /* __ASSEMBLY __ */
>
> #endif /* _ASM_SCS_H */
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index fa7981d0d917..bd5ab51f86fb 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -74,6 +74,8 @@ obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
> obj-$(CONFIG_ARM64_MTE) += mte.o
> obj-y += vdso-wrap.o
> obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
> +obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
> +CFLAGS_patch-scs.o += -mbranch-protection=none
>
> # Force dependency (vdso*-wrap.S includes vdso.so through incbin)
> $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 6a98f1a38c29..e9601c8a1bcd 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -453,6 +453,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
> mov x0, x21 // pass FDT address in x0
> bl early_fdt_map // Try mapping the FDT early
> bl init_feature_override // Parse cpu feature overrides
> +#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
> + bl scs_patch_vmlinux
> +#endif
> #ifdef CONFIG_RANDOMIZE_BASE
> tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
> b.ne 0f
> diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
> index bda49430c9ea..c284ec35c27c 100644
> --- a/arch/arm64/kernel/irq.c
> +++ b/arch/arm64/kernel/irq.c
> @@ -39,7 +39,7 @@ static void init_irq_scs(void)
> {
> int cpu;
>
> - if (!IS_ENABLED(CONFIG_SHADOW_CALL_STACK))
> + if (!scs_is_enabled())
> return;
>
> for_each_possible_cpu(cpu)
I applied the series on top of -rc1 and tested it in qemu. With -cpu
cortex-a57 everything seems to work and PAC instructions are correctly
patched into SCS push/pop:
Thread 1 hit Breakpoint 1, 0xffff8000098b7920 in scs_patch_vmlinux ()
(gdb) x/1i scs_alloc
0xffff8000081daeb8 <scs_alloc>: paciasp
(gdb) finish
Run till exit from #0 0xffff8000098b7920 in scs_patch_vmlinux ()
0xffff8000098b03cc in __primary_switched ()
(gdb) x/1i scs_alloc
0xffff8000081daeb8 <scs_alloc>: str x30, [x18], #8
However, with -cpu max I'm still seeing calls to scs_alloc despite the
following:
# dmesg | grep "Address authentication"
[ 0.000000] CPU features: detected: Address authentication (architected QARMA5 algorithm)
# zcat /proc/config.gz | grep -E '(SHADOW_|UNWIND|DYNAMIC_SCS)'
CONFIG_UNWIND_TABLES=y
CONFIG_UNWIND_PATCH_PAC_INTO_SCS=y
CONFIG_CC_HAVE_SHADOW_CALL_STACK=y
CONFIG_ARCH_SUPPORTS_SHADOW_CALL_STACK=y
CONFIG_SHADOW_CALL_STACK=y
CONFIG_DYNAMIC_SCS=y
It looks like we're correctly leaving the PAC instructions unpatched,
but scs_is_enabled must be returning true:
Thread 1 hit Breakpoint 1, 0xffff8000098b7920 in scs_patch_vmlinux ()
(gdb) x/1i scs_alloc
0xffff8000081daeb8 <scs_alloc>: paciasp
(gdb) finish
Run till exit from #0 0xffff8000098b7920 in scs_patch_vmlinux ()
0xffff8000098b03cc in __primary_switched ()
(gdb) x/1i scs_alloc
0xffff8000081daeb8 <scs_alloc>: paciasp
(gdb) c
Continuing.
Thread 1 hit Breakpoint 2, 0xffff8000081daeb8 in scs_alloc ()
(gdb) bt
#0 0xffff8000081daeb8 in scs_alloc ()
#1 0xffff800008015b60 in init_irq_scs ()
#2 0xffff8000098b3c5c in init_IRQ ()
#3 0xffff8000098b0954 in start_kernel ()
#4 0xffff8000098b03f4 in __primary_switched ()
I'm guessing this is also why I'm getting the following panic after
attempting to load a module:
[ 25.549517] Unhandled 64-bit el1h sync exception on CPU0, ESR 0x0000000034000003 -- BTI
[ 25.549893] CPU: 0 PID: 156 Comm: modprobe Not tainted 5.19.0-rc1-00246-gef5e30ca27c0-dirty #1
[ 25.550139] Hardware name: linux,dummy-virt (DT)
[ 25.550426] pstate: 80400c09 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=j-)
[ 25.550591] pc : scs_handle_fde_frame+0x80/0x17c
[ 25.551341] lr : scs_handle_fde_frame+0x54/0x17c
[ 25.551424] sp : ffff80000a67bb90
[ 25.551476] x29: ffff80000a67bb90 x28: 0000000000000000 x27: ffff800000ff6025
[ 25.552440] x26: 0000000000000003 x25: 0000000000000002 x24: ffff800000ff6026
[ 25.552584] x23: ffff800008ffd000 x22: 00000000d50323bf x21: 00000000d503233f
[ 25.552725] x20: ffff800000ff0048 x19: ffff800000ff6014 x18: ffff80000a5d1000
[ 25.552867] x17: 0000000000000000 x16: ffff00001fe9cd88 x15: 24003032336f7470
[ 25.553020] x14: 180f0a0700000000 x13: 0000000000000040 x12: 0000000000000005
[ 25.553161] x11: 6c6300656c75646f x10: 0000000000000010 x9 : ffffffffffffffe4
[ 25.553354] x8 : ffff800008fd1870 x7 : 7f7f7f7f7f7f7f7f x6 : 16fefeff0eff1e0b
[ 25.553511] x5 : 0080808000800000 x4 : 0000000000000010 x3 : 1800000010001f0c
[ 25.553645] x2 : 1b011e7c0100527a x1 : 0000000000000000 x0 : ffff800000ff0048
[ 25.553980] Kernel panic - not syncing: Unhandled exception
[ 25.554134] CPU: 0 PID: 156 Comm: modprobe Not tainted 5.19.0-rc1-00246-gef5e30ca27c0-dirty #1
[ 25.554254] Hardware name: linux,dummy-virt (DT)
[ 25.554394] Call trace:
[ 25.554512] dump_backtrace+0xe4/0x104
[ 25.554773] show_stack+0x18/0x24
[ 25.554893] dump_stack_lvl+0x64/0x7c
[ 25.554983] dump_stack+0x18/0x38
[ 25.555067] panic+0x14c/0x370
[ 25.555149] el1t_64_irq_handler+0x0/0x1c
[ 25.555231] el1_abort+0x0/0x5c
[ 25.555314] el1h_64_sync+0x64/0x68
[ 25.555395] scs_handle_fde_frame+0x80/0x17c
[ 25.555482] scs_patch+0x7c/0xa0
[ 25.555565] module_finalize+0xe0/0x100
[ 25.555647] post_relocation+0xd4/0xf0
[ 25.555730] load_module+0x924/0x11fc
[ 25.555811] __arm64_sys_finit_module+0xbc/0x108
[ 25.555895] invoke_syscall+0x40/0x118
[ 25.555984] el0_svc_common+0xb4/0xf0
[ 25.556067] do_el0_svc+0x2c/0xb4
[ 25.556150] el0_svc+0x2c/0x7c
[ 25.556233] el0t_64_sync_handler+0x84/0xf0
[ 25.556315] el0t_64_sync+0x190/0x194
This is with defconfig + SHADOW_CALL_STACK + UNWIND_PATCH_PAC_INTO_SCS.
Any thoughts if I'm doing something wrong here?
Sami
More information about the linux-arm-kernel
mailing list