[PATCH v3 3/3] arm64: implement dynamic shadow call stack for Clang

Sami Tolvanen samitolvanen at google.com
Wed Jun 15 14:32:53 PDT 2022


On Mon, Jun 13, 2022 at 03:40:08PM +0200, Ard Biesheuvel wrote:
> Implement dynamic shadow call stack support on Clang, by parsing the
> unwind tables at init time to locate all occurrences of PACIASP/AUTIASP
> instructions, and replacing them with the shadow call stack push and pop
> instructions, respectively.
> 
> This is useful because the overhead of the shadow call stack is
> difficult to justify on hardware that implements pointer authentication
> (PAC), and given that the PAC instructions are executed as NOPs on
> hardware that doesn't, we can just replace them without breaking
> anything. As PACIASP/AUTIASP are guaranteed to be paired with respect to
> manipulations of the return address, replacing them 1:1 with shadow call
> stack pushes and pops is guaranteed to result in the desired behavior.
> 
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> ---
>  arch/arm64/Kconfig            |   9 +
>  arch/arm64/Makefile           |  10 +-
>  arch/arm64/include/asm/scs.h  |  45 ++++
>  arch/arm64/kernel/Makefile    |   2 +
>  arch/arm64/kernel/head.S      |   3 +
>  arch/arm64/kernel/irq.c       |   2 +-
>  arch/arm64/kernel/module.c    |   8 +
>  arch/arm64/kernel/patch-scs.c | 257 ++++++++++++++++++++
>  arch/arm64/kernel/sdei.c      |   2 +-
>  arch/arm64/kernel/setup.c     |   4 +
>  10 files changed, 338 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5f92344edff5..9ff72e582522 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -369,6 +369,15 @@ config KASAN_SHADOW_OFFSET
>  config UNWIND_TABLES
>  	bool
>  
> +config UNWIND_PATCH_PAC_INTO_SCS
> +	bool "Enable shadow call stack dynamically using code patching"
> +	# needs Clang with https://reviews.llvm.org/D111780 incorporated
> +	depends on CC_IS_CLANG && CLANG_VERSION >= 150000
> +	depends on ARM64_PTR_AUTH_KERNEL && CC_HAS_BRANCH_PROT_PAC_RET
> +	depends on SHADOW_CALL_STACK
> +	select UNWIND_TABLES
> +	select DYNAMIC_SCS
> +
>  source "arch/arm64/Kconfig.platforms"
>  
>  menu "Kernel Features"
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index 4fbca56fa602..e439ebbd167d 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -77,10 +77,16 @@ branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=
>  # We enable additional protection for leaf functions as there is some
>  # narrow potential for ROP protection benefits and no substantial
>  # performance impact has been observed.
> +PACRET-y := pac-ret+leaf
> +
> +# Using a shadow call stack in leaf functions is too costly, so avoid PAC there
> +# as well when we may be patching PAC into SCS
> +PACRET-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) := pac-ret
> +
>  ifeq ($(CONFIG_ARM64_BTI_KERNEL),y)
> -branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=pac-ret+leaf+bti
> +branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=$(PACRET-y)+bti
>  else
> -branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=pac-ret+leaf
> +branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=$(PACRET-y)
>  endif
>  # -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the
>  # compiler to generate them and consequently to break the single image contract
> diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
> index 8297bccf0784..51fcfc96ba71 100644
> --- a/arch/arm64/include/asm/scs.h
> +++ b/arch/arm64/include/asm/scs.h
> @@ -24,6 +24,51 @@
>  	.endm
>  #endif /* CONFIG_SHADOW_CALL_STACK */
>  
> +
> +#else
> +
> +#include <linux/scs.h>
> +#include <asm/cpufeature.h>
> +
> +#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
> +static inline bool should_patch_pac_into_scs(void)
> +{
> +	/*
> +	 * We only enable the shadow call stack dynamically if we are running
> +	 * on a system that does not implement PAC or BTI. PAC and SCS provide
> +	 * roughly the same level of protection, and BTI relies on the PACIASP
> +	 * instructions serving as landing pads, preventing us from patching
> +	 * those instructions into something else.
> +	 */
> +	u64 reg = read_sysreg_s(SYS_ID_AA64ISAR1_EL1);
> +
> +	if (reg & ((0xf << ID_AA64ISAR1_APA_SHIFT) |
> +		   (0xf << ID_AA64ISAR1_API_SHIFT)))
> +		return false;
> +
> +	reg = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
> +	if (reg & (0xf << ID_AA64ISAR2_APA3_SHIFT))
> +		return false;
> +
> +	if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
> +		reg = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
> +		if (reg & (0xf << ID_AA64PFR1_BT_SHIFT))
> +			return false;
> +	}
> +	return true;
> +}
> +
> +static inline void dynamic_scs_init(void)
> +{
> +	if (should_patch_pac_into_scs())
> +		static_branch_enable(&dynamic_scs_enabled);
> +}

Should we print out a message to indicate we are actually enabling SCS
at runtime? Otherwise I think the only way to know would be to look at
/proc/meminfo, for example.

> +#else
> +static inline void dynamic_scs_init(void) {}
> +#endif
> +
> +int scs_patch(const u8 eh_frame[], int size);
> +
>  #endif /* __ASSEMBLY __ */
>  
>  #endif /* _ASM_SCS_H */
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index fa7981d0d917..bd5ab51f86fb 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -74,6 +74,8 @@ obj-$(CONFIG_ARM64_PTR_AUTH)		+= pointer_auth.o
>  obj-$(CONFIG_ARM64_MTE)			+= mte.o
>  obj-y					+= vdso-wrap.o
>  obj-$(CONFIG_COMPAT_VDSO)		+= vdso32-wrap.o
> +obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS)	+= patch-scs.o
> +CFLAGS_patch-scs.o			+= -mbranch-protection=none
>  
>  # Force dependency (vdso*-wrap.S includes vdso.so through incbin)
>  $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 6a98f1a38c29..e9601c8a1bcd 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -453,6 +453,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
>  	mov	x0, x21				// pass FDT address in x0
>  	bl	early_fdt_map			// Try mapping the FDT early
>  	bl	init_feature_override		// Parse cpu feature overrides
> +#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
> +	bl	scs_patch_vmlinux
> +#endif
>  #ifdef CONFIG_RANDOMIZE_BASE
>  	tst	x23, ~(MIN_KIMG_ALIGN - 1)	// already running randomized?
>  	b.ne	0f
> diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
> index bda49430c9ea..c284ec35c27c 100644
> --- a/arch/arm64/kernel/irq.c
> +++ b/arch/arm64/kernel/irq.c
> @@ -39,7 +39,7 @@ static void init_irq_scs(void)
>  {
>  	int cpu;
>  
> -	if (!IS_ENABLED(CONFIG_SHADOW_CALL_STACK))
> +	if (!scs_is_enabled())
>  		return;
>  
>  	for_each_possible_cpu(cpu)

I applied the series on top of -rc1 and tested it in qemu. With -cpu
cortex-a57 everything seems to work and PAC instructions are correctly
patched into SCS push/pop:

Thread 1 hit Breakpoint 1, 0xffff8000098b7920 in scs_patch_vmlinux ()
(gdb) x/1i scs_alloc
   0xffff8000081daeb8 <scs_alloc>:      paciasp
(gdb) finish
Run till exit from #0  0xffff8000098b7920 in scs_patch_vmlinux ()
0xffff8000098b03cc in __primary_switched ()
(gdb) x/1i scs_alloc
   0xffff8000081daeb8 <scs_alloc>:      str     x30, [x18], #8

However, with -cpu max I'm still seeing calls to scs_alloc despite the
following:

# dmesg | grep "Address authentication"
[    0.000000] CPU features: detected: Address authentication (architected QARMA5 algorithm)
# zcat /proc/config.gz | grep -E '(SHADOW_|UNWIND|DYNAMIC_SCS)'
CONFIG_UNWIND_TABLES=y
CONFIG_UNWIND_PATCH_PAC_INTO_SCS=y
CONFIG_CC_HAVE_SHADOW_CALL_STACK=y
CONFIG_ARCH_SUPPORTS_SHADOW_CALL_STACK=y
CONFIG_SHADOW_CALL_STACK=y
CONFIG_DYNAMIC_SCS=y

It looks like we're correctly leaving the PAC instructions unpatched,
but scs_is_enabled must be returning true:

Thread 1 hit Breakpoint 1, 0xffff8000098b7920 in scs_patch_vmlinux ()
(gdb) x/1i scs_alloc
   0xffff8000081daeb8 <scs_alloc>:      paciasp
(gdb) finish
Run till exit from #0  0xffff8000098b7920 in scs_patch_vmlinux ()
0xffff8000098b03cc in __primary_switched ()
(gdb) x/1i scs_alloc
   0xffff8000081daeb8 <scs_alloc>:      paciasp
(gdb) c
Continuing.

Thread 1 hit Breakpoint 2, 0xffff8000081daeb8 in scs_alloc ()
(gdb) bt
#0  0xffff8000081daeb8 in scs_alloc ()
#1  0xffff800008015b60 in init_irq_scs ()
#2  0xffff8000098b3c5c in init_IRQ ()
#3  0xffff8000098b0954 in start_kernel ()
#4  0xffff8000098b03f4 in __primary_switched ()

I'm guessing this is also why I'm getting the following panic after
attempting to load a module:

[   25.549517] Unhandled 64-bit el1h sync exception on CPU0, ESR 0x0000000034000003 -- BTI
[   25.549893] CPU: 0 PID: 156 Comm: modprobe Not tainted 5.19.0-rc1-00246-gef5e30ca27c0-dirty #1
[   25.550139] Hardware name: linux,dummy-virt (DT)
[   25.550426] pstate: 80400c09 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=j-)
[   25.550591] pc : scs_handle_fde_frame+0x80/0x17c
[   25.551341] lr : scs_handle_fde_frame+0x54/0x17c
[   25.551424] sp : ffff80000a67bb90
[   25.551476] x29: ffff80000a67bb90 x28: 0000000000000000 x27: ffff800000ff6025
[   25.552440] x26: 0000000000000003 x25: 0000000000000002 x24: ffff800000ff6026
[   25.552584] x23: ffff800008ffd000 x22: 00000000d50323bf x21: 00000000d503233f
[   25.552725] x20: ffff800000ff0048 x19: ffff800000ff6014 x18: ffff80000a5d1000
[   25.552867] x17: 0000000000000000 x16: ffff00001fe9cd88 x15: 24003032336f7470
[   25.553020] x14: 180f0a0700000000 x13: 0000000000000040 x12: 0000000000000005
[   25.553161] x11: 6c6300656c75646f x10: 0000000000000010 x9 : ffffffffffffffe4
[   25.553354] x8 : ffff800008fd1870 x7 : 7f7f7f7f7f7f7f7f x6 : 16fefeff0eff1e0b
[   25.553511] x5 : 0080808000800000 x4 : 0000000000000010 x3 : 1800000010001f0c
[   25.553645] x2 : 1b011e7c0100527a x1 : 0000000000000000 x0 : ffff800000ff0048
[   25.553980] Kernel panic - not syncing: Unhandled exception
[   25.554134] CPU: 0 PID: 156 Comm: modprobe Not tainted 5.19.0-rc1-00246-gef5e30ca27c0-dirty #1
[   25.554254] Hardware name: linux,dummy-virt (DT)
[   25.554394] Call trace:
[   25.554512]  dump_backtrace+0xe4/0x104
[   25.554773]  show_stack+0x18/0x24
[   25.554893]  dump_stack_lvl+0x64/0x7c
[   25.554983]  dump_stack+0x18/0x38
[   25.555067]  panic+0x14c/0x370
[   25.555149]  el1t_64_irq_handler+0x0/0x1c
[   25.555231]  el1_abort+0x0/0x5c
[   25.555314]  el1h_64_sync+0x64/0x68
[   25.555395]  scs_handle_fde_frame+0x80/0x17c
[   25.555482]  scs_patch+0x7c/0xa0
[   25.555565]  module_finalize+0xe0/0x100
[   25.555647]  post_relocation+0xd4/0xf0
[   25.555730]  load_module+0x924/0x11fc
[   25.555811]  __arm64_sys_finit_module+0xbc/0x108
[   25.555895]  invoke_syscall+0x40/0x118
[   25.555984]  el0_svc_common+0xb4/0xf0
[   25.556067]  do_el0_svc+0x2c/0xb4
[   25.556150]  el0_svc+0x2c/0x7c
[   25.556233]  el0t_64_sync_handler+0x84/0xf0
[   25.556315]  el0t_64_sync+0x190/0x194

This is with defconfig + SHADOW_CALL_STACK + UNWIND_PATCH_PAC_INTO_SCS.
Any thoughts if I'm doing something wrong here?

Sami



More information about the linux-arm-kernel mailing list