[PATCH v2 7/7] ARM: implement support for vmap'ed stacks
Ard Biesheuvel
ardb at kernel.org
Mon Oct 18 14:08:39 PDT 2021
On Mon, 18 Oct 2021 at 23:00, Nick Desaulniers <ndesaulniers at google.com> wrote:
>
> On Mon, Oct 18, 2021 at 7:16 AM Ard Biesheuvel <ardb at kernel.org> wrote:
> >
> > Wire up the generic support for managing task stack allocations via vmalloc,
> > and implement the entry code that detects whether we faulted because of a
> > stack overrun (or future stack overrun caused by pushing the pt_regs array)
> >
> > While this adds a fair amount of tricky entry asm code, it should be
> > noted that it only adds a TST + branch to the svc_entry path. The code
> > implementing the non-trivial handling of the overflow stack is emitted
> > out-of-line into the .text section.
> >
> > Since on ARM, we rely on do_translation_fault() to keep PMD level page
> > table entries that cover the vmalloc region up to date, we need to
> > ensure that we don't hit such a stale PMD entry when accessing the
> > stack. So we do a dummy read from the new stack while still running from
> > the old one on the context switch path, and bump the vmalloc_seq counter
> > when PMD level entries in the vmalloc range are modified, so that the MM
> > switch fetches the latest version of the entries.
> >
> > Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> > Tested-by: Keith Packard <keithpac at amazon.com>
> > ---
> > arch/arm/Kconfig | 1 +
> > arch/arm/include/asm/page.h | 4 +
> > arch/arm/include/asm/thread_info.h | 8 ++
> > arch/arm/kernel/entry-armv.S | 79 ++++++++++++++++++--
> > arch/arm/kernel/entry-header.S | 57 ++++++++++++++
> > arch/arm/kernel/irq.c | 9 ++-
> > arch/arm/kernel/traps.c | 65 +++++++++++++++-
> > arch/arm/kernel/unwind.c | 3 +-
> > arch/arm/kernel/vmlinux.lds.S | 4 +-
> > 9 files changed, 219 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> > index d46b243e1b26..964cc41a0fed 100644
> > --- a/arch/arm/Kconfig
> > +++ b/arch/arm/Kconfig
> > @@ -126,6 +126,7 @@ config ARM
> > select RTC_LIB
> > select SYS_SUPPORTS_APM_EMULATION
> > select THREAD_INFO_IN_TASK if CURRENT_POINTER_IN_TPIDRURO
> > + select HAVE_ARCH_VMAP_STACK if THREAD_INFO_IN_TASK
> > select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
> > # Above selects are sorted alphabetically; please add new ones
> > # according to that. Thanks.
> > diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
> > index 11b058a72a5b..7b871ed99ccf 100644
> > --- a/arch/arm/include/asm/page.h
> > +++ b/arch/arm/include/asm/page.h
> > @@ -149,6 +149,10 @@ extern void copy_page(void *to, const void *from);
> > #include <asm/pgtable-2level-types.h>
> > #endif
> >
> > +#ifdef CONFIG_VMAP_STACK
> > +#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
> > +#endif
> > +
> > #endif /* CONFIG_MMU */
> >
> > typedef struct page *pgtable_t;
> > diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> > index 787511396f3f..900c8ae484c9 100644
> > --- a/arch/arm/include/asm/thread_info.h
> > +++ b/arch/arm/include/asm/thread_info.h
> > @@ -25,6 +25,14 @@
> > #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
> > #define THREAD_START_SP (THREAD_SIZE - 8)
> >
> > +#ifdef CONFIG_VMAP_STACK
> > +#define THREAD_ALIGN (2 * THREAD_SIZE)
> > +#else
> > +#define THREAD_ALIGN THREAD_SIZE
> > +#endif
> > +
> > +#define OVERFLOW_STACK_SIZE SZ_4K
> > +
> > #ifndef __ASSEMBLY__
> >
> > struct task_struct;
> > diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> > index 80afab1c344b..6910d6596e08 100644
> > --- a/arch/arm/kernel/entry-armv.S
> > +++ b/arch/arm/kernel/entry-armv.S
> > @@ -58,6 +58,14 @@ UNWIND( .setfp fpreg, sp )
> > @
> > subs r2, sp, r0 @ SP above bottom of IRQ stack?
> > rsbscs r2, r2, #THREAD_SIZE @ ... and below the top?
> > +#ifdef CONFIG_VMAP_STACK
> > + bcs .L\@
> > + mov_l r2, overflow_stack @ Take base address
> > + add r2, r2, r3 @ Top of this CPU's overflow stack
> > + subs r2, r0, r2 @ Compare with incoming SP
> > + rsbscs r2, r2, #OVERFLOW_STACK_SIZE
> > +.L\@:
> > +#endif
> > movcs sp, r0 @ If so, revert to incoming SP
> >
> > #ifndef CONFIG_UNWINDER_ARM
> > @@ -189,13 +197,18 @@ ENDPROC(__und_invalid)
> > #define SPFIX(code...)
> > #endif
> >
> > - .macro svc_entry, stack_hole=0, trace=1, uaccess=1
> > + .macro svc_entry, stack_hole=0, trace=1, uaccess=1, overflow_check=1
> > UNWIND(.fnstart )
> > - UNWIND(.save {r0 - pc} )
> > sub sp, sp, #(SVC_REGS_SIZE + \stack_hole)
> > + THUMB( add sp, r0 ) @ get SP in a GPR without
> > + THUMB( sub r0, sp, r0 ) @ using a temp register
> > +
> > + .if \overflow_check
> > + UNWIND(.save {r0 - pc} )
> > + do_overflow_check (SVC_REGS_SIZE + \stack_hole)
> > + .endif
> > +
> > #ifdef CONFIG_THUMB2_KERNEL
> > - add sp, r0 @ get SP in a GPR without
> > - sub r0, sp, r0 @ using a temp register
> > tst r0, #4 @ test stack pointer alignment
> > sub r0, sp, r0 @ restore original R0
> > sub sp, r0 @ restore original SP
> > @@ -828,12 +841,20 @@ ENTRY(__switch_to)
> > str r7, [r8]
> > #endif
> > mov r0, r5
> > -#if !defined(CONFIG_THUMB2_KERNEL)
> > +#if !defined(CONFIG_THUMB2_KERNEL) && !defined(CONFIG_VMAP_STACK)
> > set_current r7
> > ldmia r4, {r4 - sl, fp, sp, pc} @ Load all regs saved previously
> > #else
> > mov r1, r7
> > ldmia r4, {r4 - sl, fp, ip, lr} @ Load all regs saved previously
> > +#ifdef CONFIG_VMAP_STACK
> > + @
> > + @ Do a dummy read from the new stack while running from the old one so
> > + @ that we can rely on do_translation_fault() to fix up any stale PMD
> > + @ entries covering the vmalloc region.
> > + @
> > + ldr r2, [ip]
> > +#endif
> > set_current r1
> > mov sp, ip
> > ret lr
> > @@ -841,6 +862,54 @@ ENTRY(__switch_to)
> > UNWIND(.fnend )
> > ENDPROC(__switch_to)
> >
> > +#ifdef CONFIG_VMAP_STACK
> > + .text
> > +__bad_stack:
> > + @
> > + @ We detected an overflow in svc_entry, which switched to the
> > + @ overflow stack. Stash the exception regs, and head to our overflow
> > + @ handler. Entered with the orginal value of SP in IP, and the original
> > + @ value of IP in TPIDRURW
> > + @
> > +
> > +#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
> > + mov ip, ip @ mov expected by unwinder
> > + push {fp, ip, lr, pc} @ GCC flavor frame record
> > +#else
> > + str ip, [sp, #-8]! @ store original SP
> > + push {fpreg, lr} @ Clang flavor frame record
> > +#endif
> > +UNWIND( ldr ip, [r0, #4] ) @ load exception LR
> > +UNWIND( str ip, [sp, #12] ) @ store in the frame record
> > + mrc p15, 0, ip, c13, c0, 2 @ reload IP
> > +
> > + @ Store the original GPRs to the new stack.
> > + svc_entry uaccess=0, overflow_check=0
> > +
> > +UNWIND( .save {sp, pc} )
> > +UNWIND( .save {fpreg, lr} )
> > +UNWIND( .setfp fpreg, sp )
> > +
> > + ldr fpreg, [sp, #S_SP] @ Add our frame record
> > + @ to the linked list
> > +#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
> > + ldr r1, [fp, #4] @ reload SP at entry
> > + add fp, fp, #12
> > +#else
> > + ldr r1, [fpreg, #8]
> > +#endif
> > + str r1, [sp, #S_SP] @ store in pt_regs
> > +
> > + @ Stash the regs for handle_bad_stack
> > + mov r0, sp
> > +
> > + @ Time to die
> > + bl handle_bad_stack
> > + nop
> > +UNWIND( .fnend )
> > +ENDPROC(__bad_stack)
> > +#endif
> > +
> > __INIT
> >
> > /*
> > diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
> > index ae24dd54e9ef..823dd1aa6e3e 100644
> > --- a/arch/arm/kernel/entry-header.S
> > +++ b/arch/arm/kernel/entry-header.S
> > @@ -423,3 +423,60 @@ scno .req r7 @ syscall number
> > tbl .req r8 @ syscall table pointer
> > why .req r8 @ Linux syscall (!= 0)
> > tsk .req r9 @ current thread_info
> > +
> > + .macro do_overflow_check, frame_size:req
> > +#ifdef CONFIG_VMAP_STACK
> > + @
> > + @ Test whether the SP has overflowed. Task and IRQ stacks are aligned
> > + @ so that SP & BIT(THREAD_SIZE_ORDER + PAGE_SHIFT) should always be
> > + @ zero.
> > + @
> > +ARM( tst sp, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT) )
> > +THUMB( tst r0, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT) )
> > +THUMB( it ne )
> > + bne .Lstack_overflow_check\@
> > +
> > + .pushsection .text
> > +.Lstack_overflow_check\@:
> > + @
> > + @ Either we've just detected an overflow, or we've taken an exception
> > + @ while on the overflow stack. We cannot use the stack until we have
> > + @ decided which is the case. However, as we won't return to userspace,
> > + @ we can clobber some USR/SYS mode registers to free up GPRs.
> > + @
> > +
> > + mcr p15, 0, ip, c13, c0, 2 @ Stash IP in TPIDRURW
> > + mrs ip, cpsr
> > + eor ip, ip, #(SVC_MODE ^ SYSTEM_MODE)
> > + msr cpsr_c, ip @ Switch to SYS mode
> > + eor ip, ip, #(SVC_MODE ^ SYSTEM_MODE)
> > + mov sp, ip @ Stash mode in SP_usr
> > +
> > + @ Load the overflow stack into IP using LR_usr as a scratch register
> > + mov_l lr, overflow_stack + OVERFLOW_STACK_SIZE
> > + mrc p15, 0, ip, c13, c0, 4 @ Get CPU offset
> > + add ip, ip, lr @ IP := this CPU's overflow stack
> > + mov lr, sp @ Unstash mode into LR_usr
> > + msr cpsr_c, lr @ Switch back to SVC mode
> > +
> > + @
> > + @ Check whether we are already on the overflow stack. This may happen,
> > + @ e.g., when performing accesses that may fault when dumping the stack.
> > + @ The overflow stack is not in the vmalloc space so we only need to
> > + @ check whether the incoming SP is below the top of the overflow stack.
> > + @
> > +ARM( subs ip, sp, ip ) @ Delta with top of overflow stack
> > +THUMB( subs ip, r0, ip )
> > + mrclo p15, 0, ip, c13, c0, 2 @ Restore IP
> > + blo .Lout\@ @ Carry on
> > +
> > +THUMB( sub r0, sp, r0 ) @ Restore original R0
> > +THUMB( sub sp, r0 ) @ Restore original SP
> > + sub sp, sp, ip @ Switch to overflow stack
> > + add ip, sp, ip @ Keep incoming SP value in IP
> > + add ip, ip, #\frame_size @ Undo svc_entry's SP change
> > + b __bad_stack
> > + .popsection
> > +.Lout\@:
> > +#endif
> > + .endm
> > diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
> > index bab3159b9074..25eacfd0f439 100644
> > --- a/arch/arm/kernel/irq.c
> > +++ b/arch/arm/kernel/irq.c
> > @@ -56,7 +56,14 @@ static void __init init_irq_stacks(void)
> > int cpu;
> >
> > for_each_possible_cpu(cpu) {
> > - stack = (u8 *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
> > + if (!IS_ENABLED(CONFIG_VMAP_STACK))
> > + stack = (u8 *)__get_free_pages(GFP_KERNEL,
> > + THREAD_SIZE_ORDER);
> > + else
> > + stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
> > + THREADINFO_GFP, NUMA_NO_NODE,
> > + __builtin_return_address(0));
>
> Thanks for the improved clang support in v3. The frame differences
> LGTM. The only feedback I had was here, and a truly minor nit.
>
Better not to mention it at all then, as it distracts from the real
discussion on a complex and potentially disruptive change like this
one.
More information about the linux-arm-kernel
mailing list