[PATCH 7/8] arm64: stacktrace: track all stack boundaries explicitly
Kalesh Singh
kaleshsingh at google.com
Tue Aug 2 09:05:39 PDT 2022
On Mon, Aug 1, 2022 at 5:12 AM Mark Rutland <mark.rutland at arm.com> wrote:
>
> Currently we call an on_accessible_stack() callback for each step of the
> unwinder, requiring redundant work to be performed in the core of the
> unwind loop (e.g. disabling preemption around accesses to per-cpu
> variables containing stack boundaries). To prevent unwind loops which go
> through a stack multiple times, we have to track the set of unwound
> stacks, requiring a stack_type enum which needs to cater for all the
> stacks of all possible callees. To prevent loops within a stack, we must
> track the prior FP values.
>
> This patch reworks the unwinder to minimize the work in the core of the
> unwinder, and to remove the need for the stack_type enum. The set of
> accessible stacks (and their boundaries) are determined at the start of
> the unwind, and the current stack is tracked during the unwind, with
> completed stacks removed from the set of accessible stacks. This makes
> the boundary checks more accurate (e.g. detecting overlapped frame
> records), and removes the need for separate tracking of the prior FP and
> visited stacks.
>
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Cc: Fuad Tabba <tabba at google.com>
> Cc: Kalesh Singh <kaleshsingh at google.com>
> Cc: Madhavan T. Venkataraman <madvenka at linux.microsoft.com>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Mark Brown <broonie at kernel.org>
Reviewed-by: Kalesh Singh <kaleshsingh at google.com>
> ---
> arch/arm64/include/asm/stacktrace.h | 5 -
> arch/arm64/include/asm/stacktrace/common.h | 158 ++++++++++-----------
> arch/arm64/kernel/stacktrace.c | 91 +++++-------
> arch/arm64/kvm/hyp/nvhe/stacktrace.c | 35 ++---
> arch/arm64/kvm/stacktrace.c | 36 ++---
> 5 files changed, 131 insertions(+), 194 deletions(-)
>
> diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
> index aad0c6258721d..5a0edb064ea47 100644
> --- a/arch/arm64/include/asm/stacktrace.h
> +++ b/arch/arm64/include/asm/stacktrace.h
> @@ -30,7 +30,6 @@ static inline struct stack_info stackinfo_get_irq(void)
> return (struct stack_info) {
> .low = low,
> .high = high,
> - .type = STACK_TYPE_IRQ,
> };
> }
>
> @@ -48,7 +47,6 @@ static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk
> return (struct stack_info) {
> .low = low,
> .high = high,
> - .type = STACK_TYPE_TASK,
> };
> }
>
> @@ -70,7 +68,6 @@ static inline struct stack_info stackinfo_get_overflow(void)
> return (struct stack_info) {
> .low = low,
> .high = high,
> - .type = STACK_TYPE_OVERFLOW,
> };
> }
> #else
> @@ -89,7 +86,6 @@ static inline struct stack_info stackinfo_get_sdei_normal(void)
> return (struct stack_info) {
> .low = low,
> .high = high,
> - .type = STACK_TYPE_SDEI_NORMAL,
> };
> }
>
> @@ -101,7 +97,6 @@ static inline struct stack_info stackinfo_get_sdei_critical(void)
> return (struct stack_info) {
> .low = low,
> .high = high,
> - .type = STACK_TYPE_SDEI_CRITICAL,
> };
> }
> #else
> diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
> index 0bd9d7ad295e0..c594f332bb946 100644
> --- a/arch/arm64/include/asm/stacktrace/common.h
> +++ b/arch/arm64/include/asm/stacktrace/common.h
> @@ -9,26 +9,12 @@
> #ifndef __ASM_STACKTRACE_COMMON_H
> #define __ASM_STACKTRACE_COMMON_H
>
> -#include <linux/bitmap.h>
> -#include <linux/bitops.h>
> #include <linux/kprobes.h>
> #include <linux/types.h>
>
> -enum stack_type {
> - STACK_TYPE_UNKNOWN,
> - STACK_TYPE_TASK,
> - STACK_TYPE_IRQ,
> - STACK_TYPE_OVERFLOW,
> - STACK_TYPE_SDEI_NORMAL,
> - STACK_TYPE_SDEI_CRITICAL,
> - STACK_TYPE_HYP,
> - __NR_STACK_TYPES
> -};
> -
> struct stack_info {
> unsigned long low;
> unsigned long high;
> - enum stack_type type;
> };
>
> /*
> @@ -38,32 +24,27 @@ struct stack_info {
> * @fp: The fp value in the frame record (or the real fp)
> * @pc: The lr value in the frame record (or the real lr)
> *
> - * @stacks_done: Stacks which have been entirely unwound, for which it is no
> - * longer valid to unwind to.
> - *
> - * @prev_fp: The fp that pointed to this frame record, or a synthetic value
> - * of 0. This is used to ensure that within a stack, each
> - * subsequent frame record is at an increasing address.
> - * @prev_type: The type of stack this frame record was on, or a synthetic
> - * value of STACK_TYPE_UNKNOWN. This is used to detect a
> - * transition from one stack to another.
> - *
> * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
> * associated with the most recently encountered replacement lr
> * value.
> *
> * @task: The task being unwound.
> + *
> + * @stack: The stack currently being unwound.
> + * @stacks: An array of stacks which can be unwound.
> + * @nr_stacks: The number of stacks in @stacks.
> */
> struct unwind_state {
> unsigned long fp;
> unsigned long pc;
> - DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
> - unsigned long prev_fp;
> - enum stack_type prev_type;
> #ifdef CONFIG_KRETPROBES
> struct llist_node *kr_cur;
> #endif
> struct task_struct *task;
> +
> + struct stack_info stack;
> + struct stack_info *stacks;
> + int nr_stacks;
> };
>
> static inline struct stack_info stackinfo_get_unknown(void)
> @@ -71,7 +52,6 @@ static inline struct stack_info stackinfo_get_unknown(void)
> return (struct stack_info) {
> .low = 0,
> .high = 0,
> - .type = STACK_TYPE_UNKNOWN,
> };
> }
>
> @@ -95,18 +75,7 @@ static inline void unwind_init_common(struct unwind_state *state,
> state->kr_cur = NULL;
> #endif
>
> - /*
> - * Prime the first unwind.
> - *
> - * In unwind_next() we'll check that the FP points to a valid stack,
> - * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
> - * treated as a transition to whichever stack that happens to be. The
> - * prev_fp value won't be used, but we set it to 0 such that it is
> - * definitely not an accessible stack address.
> - */
> - bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
> - state->prev_fp = 0;
> - state->prev_type = STACK_TYPE_UNKNOWN;
> + state->stack = stackinfo_get_unknown();
> }
>
> /*
> @@ -120,44 +89,91 @@ static inline void unwind_init_common(struct unwind_state *state,
> */
> typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp);
>
> +static struct stack_info *unwind_find_next_stack(const struct unwind_state *state,
> + unsigned long sp,
> + unsigned long size)
> +{
> + for (int i = 0; i < state->nr_stacks; i++) {
> + struct stack_info *info = &state->stacks[i];
> +
> + if (stackinfo_on_stack(info, sp, size))
> + return info;
> + }
> +
> + return NULL;
> +}
> +
> /*
> - * on_accessible_stack_fn() - Check whether a stack range is on any
> - * of the possible stacks.
> + * unwind_consume_stack - Check if an object is on an accessible stack,
> + * updating stack boundaries so that future unwind steps cannot consume this
> + * object again.
> *
> - * @tsk: task whose stack is being unwound
> - * @sp: stack address being checked
> - * @size: size of the stack range being checked
> - * @info: stack unwinding context
> + * @state: the current unwind state.
> + * @sp: the base address of the object.
> + * @size: the size of the object.
> */
> -typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
> - unsigned long sp, unsigned long size,
> - struct stack_info *info);
> +static inline int unwind_consume_stack(struct unwind_state *state,
> + unsigned long sp,
> + unsigned long size)
> +{
> + struct stack_info *next;
> +
> + if (stackinfo_on_stack(&state->stack, sp, size))
> + goto found;
> +
> + next = unwind_find_next_stack(state, sp, size);
> + if (!next)
> + return -EINVAL;
> +
> + /*
> + * Stack transitions are strictly one-way, and once we've
> + * transitioned from one stack to another, it's never valid to
> + * unwind back to the old stack.
> + *
> + * Remove the current stack from the list of stacks so that it cannot
> + * be found on a subsequent transition.
> + *
> + * Note that stacks can nest in several valid orders, e.g.
> + *
> + * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
> + * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
> + * HYP -> OVERFLOW
> + *
> + * ... so we do not check the specific order of stack
> + * transitions.
> + */
> + state->stack = *next;
> + *next = stackinfo_get_unknown();
> +
> +found:
> + /*
> + * Future unwind steps can only consume stack above this frame record.
> + * Update the current stack to start immediately above it.
> + */
> + state->stack.low = sp + size;
> + return 0;
> +}
>
> /*
> * unwind_next_frame_record() - Unwind to the next frame record indicated by
> * @state->fp.
> *
> * @state: the current unwind state.
> - * @accessible: determines whether the frame record is accessible
> * @translate_fp: translates the fp prior to access (may be NULL)
> */
> static inline int
> unwind_next_frame_record(struct unwind_state *state,
> - on_accessible_stack_fn accessible,
> stack_trace_translate_fp_fn translate_fp)
> {
> - struct stack_info info;
> unsigned long fp = state->fp, kern_fp = fp;
> - struct task_struct *tsk = state->task;
> + int err;
>
> if (fp & 0x7)
> return -EINVAL;
>
> - if (!accessible(tsk, fp, 16, &info))
> - return -EINVAL;
> -
> - if (test_bit(info.type, state->stacks_done))
> - return -EINVAL;
> + err = unwind_consume_stack(state, fp, 16);
> + if (err)
> + return err;
>
> /*
> * If fp is not from the current address space perform the necessary
> @@ -167,34 +183,10 @@ unwind_next_frame_record(struct unwind_state *state,
> return -EINVAL;
>
> /*
> - * As stacks grow downward, any valid record on the same stack must be
> - * at a strictly higher address than the prior record.
> - *
> - * Stacks can nest in several valid orders, e.g.
> - *
> - * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
> - * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
> - * HYP -> OVERFLOW
> - *
> - * ... but the nesting itself is strict. Once we transition from one
> - * stack to another, it's never valid to unwind back to that first
> - * stack.
> - */
> - if (info.type == state->prev_type) {
> - if (fp <= state->prev_fp)
> - return -EINVAL;
> - } else {
> - __set_bit(state->prev_type, state->stacks_done);
> - }
> -
> - /*
> - * Record this frame record's values and location. The prev_fp and
> - * prev_type are only meaningful to the next unwind_next() invocation.
> + * Record this frame record's values.
> */
> state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
> state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
> - state->prev_fp = fp;
> - state->prev_type = info.type;
>
> return 0;
> }
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index ca56fd732c2a9..1133be3ff774d 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -67,57 +67,6 @@ static inline void unwind_init_from_task(struct unwind_state *state,
> state->pc = thread_saved_pc(task);
> }
>
> -static bool on_accessible_stack(const struct task_struct *tsk,
> - unsigned long sp, unsigned long size,
> - struct stack_info *info)
> -{
> - struct stack_info tmp;
> -
> - tmp = stackinfo_get_task(tsk);
> - if (stackinfo_on_stack(&tmp, sp, size))
> - goto found;
> -
> - /*
> - * We can only safely access per-cpu stacks when unwinding the current
> - * task in a non-preemptible context.
> - */
> - if (tsk != current || preemptible())
> - goto not_found;
> -
> - tmp = stackinfo_get_irq();
> - if (stackinfo_on_stack(&tmp, sp, size))
> - goto found;
> -
> - tmp = stackinfo_get_overflow();
> - if (stackinfo_on_stack(&tmp, sp, size))
> - goto found;
> -
> - /*
> - * We can only safely access SDEI stacks which unwinding the current
> - * task in an NMI context.
> - */
> - if (!IS_ENABLED(CONFIG_VMAP_STACK) ||
> - !IS_ENABLED(CONFIG_ARM_SDE_INTERFACE) ||
> - !in_nmi())
> - goto not_found;
> -
> - tmp = stackinfo_get_sdei_normal();
> - if (stackinfo_on_stack(&tmp, sp, size))
> - goto found;
> -
> - tmp = stackinfo_get_sdei_critical();
> - if (stackinfo_on_stack(&tmp, sp, size))
> - goto found;
> -
> -not_found:
> - *info = stackinfo_get_unknown();
> - return false;
> -
> -found:
> - *info = tmp;
> - return true;
> -}
> -
> /*
> * Unwind from one frame record (A) to the next frame record (B).
> *
> @@ -135,7 +84,7 @@ static int notrace unwind_next(struct unwind_state *state)
> if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
> return -ENOENT;
>
> - err = unwind_next_frame_record(state, on_accessible_stack, NULL);
> + err = unwind_next_frame_record(state, NULL);
> if (err)
> return err;
>
> @@ -215,11 +164,47 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
> barrier();
> }
>
> +/*
> + * Per-cpu stacks are only accessible when unwinding the current task in a
> + * non-preemptible context.
> + */
> +#define STACKINFO_CPU(name) \
> + ({ \
> + ((task == current) && !preemptible()) \
> + ? stackinfo_get_##name() \
> + : stackinfo_get_unknown(); \
> + })
> +
> +/*
> + * SDEI stacks are only accessible when unwinding the current task in an NMI
> + * context.
> + */
> +#define STACKINFO_SDEI(name) \
> + ({ \
> + ((task == current) && in_nmi()) \
> + ? stackinfo_get_sdei_##name() \
> + : stackinfo_get_unknown(); \
> + })
> +
> noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
> void *cookie, struct task_struct *task,
> struct pt_regs *regs)
> {
> - struct unwind_state state;
> + struct stack_info stacks[] = {
> + stackinfo_get_task(task),
> + STACKINFO_CPU(irq),
> +#if defined (CONFIG_VMAP_STACK)
> + STACKINFO_CPU(overflow),
> +#endif
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
> + STACKINFO_SDEI(normal),
> + STACKINFO_SDEI(critical),
> +#endif
> + };
> + struct unwind_state state = {
> + .stacks = stacks,
> + .nr_stacks = ARRAY_SIZE(stacks),
> + };
>
> if (regs) {
> if (task != current)
> diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
> index 5da0d44f61b73..08e1325ead73f 100644
> --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c
> +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
> @@ -47,7 +47,6 @@ static struct stack_info stackinfo_get_overflow(void)
> return (struct stack_info) {
> .low = low,
> .high = high,
> - .type = STACK_TYPE_OVERFLOW,
> };
> }
>
> @@ -60,35 +59,12 @@ static struct stack_info stackinfo_get_hyp(void)
> return (struct stack_info) {
> .low = low,
> .high = high,
> - .type = STACK_TYPE_HYP,
> };
> }
>
> -static bool on_accessible_stack(const struct task_struct *tsk,
> - unsigned long sp, unsigned long size,
> - struct stack_info *info)
> -{
> - struct stack_info tmp;
> -
> - tmp = stackinfo_get_overflow();
> - if (stackinfo_on_stack(&tmp, sp, size))
> - goto found;
> -
> - tmp = stackinfo_get_hyp();
> - if (stackinfo_on_stack(&tmp, sp, size))
> - goto found;
> -
> - *info = stackinfo_get_unknown();
> - return false;
> -
> -found:
> - *info = tmp;
> - return true;
> -}
> -
> static int unwind_next(struct unwind_state *state)
> {
> - return unwind_next_frame_record(state, on_accessible_stack, NULL);
> + return unwind_next_frame_record(state, NULL);
> }
>
> static void notrace unwind(struct unwind_state *state,
> @@ -144,7 +120,14 @@ static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
> */
> static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
> {
> - struct unwind_state state;
> + struct stack_info stacks[] = {
> + stackinfo_get_overflow(),
> + stackinfo_get_hyp(),
> + };
> + struct unwind_state state = {
> + .stacks = stacks,
> + .nr_stacks = ARRAY_SIZE(stacks),
> + };
> int idx = 0;
>
> kvm_nvhe_unwind_init(&state, fp, pc);
> diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c
> index 62ffa2b40da10..8295e132da2f0 100644
> --- a/arch/arm64/kvm/stacktrace.c
> +++ b/arch/arm64/kvm/stacktrace.c
> @@ -31,7 +31,6 @@ static struct stack_info stackinfo_get_overflow(void)
> return (struct stack_info) {
> .low = low,
> .high = high,
> - .type = STACK_TYPE_OVERFLOW,
> };
> }
>
> @@ -45,7 +44,6 @@ static struct stack_info stackinfo_get_hyp(void)
> return (struct stack_info) {
> .low = low,
> .high = high,
> - .type = STACK_TYPE_HYP,
> };
> }
>
> @@ -94,32 +92,9 @@ static bool kvm_nvhe_stack_kern_va(unsigned long *addr)
> return true;
> }
>
> -static bool on_accessible_stack(const struct task_struct *tsk,
> - unsigned long sp, unsigned long size,
> - struct stack_info *info)
> -{
> - struct stack_info tmp;
> -
> - tmp = stackinfo_get_overflow();
> - if (stackinfo_on_stack(&tmp, sp, size))
> - goto found;
> -
> - tmp = stackinfo_get_hyp();
> - if (stackinfo_on_stack(&tmp, sp, size))
> - goto found;
> -
> - *info = stackinfo_get_unknown();
> - return false;
> -
> -found:
> - *info = tmp;
> - return true;
> -}
> -
> static int unwind_next(struct unwind_state *state)
> {
> - return unwind_next_frame_record(state, on_accessible_stack,
> - kvm_nvhe_stack_kern_va);
> + return unwind_next_frame_record(state, kvm_nvhe_stack_kern_va);
> }
>
> static void unwind(struct unwind_state *state,
> @@ -177,7 +152,14 @@ static void kvm_nvhe_dump_backtrace_end(void)
> static void hyp_dump_backtrace(unsigned long hyp_offset)
> {
> struct kvm_nvhe_stacktrace_info *stacktrace_info;
> - struct unwind_state state;
> + struct stack_info stacks[] = {
> + stackinfo_get_overflow(),
> + stackinfo_get_hyp(),
> + };
> + struct unwind_state state = {
> + .stacks = stacks,
> + .nr_stacks = ARRAY_SIZE(stacks),
> + };
>
> stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
>
> --
> 2.30.2
>
More information about the linux-arm-kernel
mailing list