[PATCH 7/8] arm64: stacktrace: track all stack boundaries explicitly

Kalesh Singh kaleshsingh at google.com
Tue Aug 2 09:05:39 PDT 2022


On Mon, Aug 1, 2022 at 5:12 AM Mark Rutland <mark.rutland at arm.com> wrote:
>
> Currently we call an on_accessible_stack() callback for each step of the
> unwinder, requiring redundant work to be performed in the core of the
> unwind loop (e.g. disabling preemption around accesses to per-cpu
> variables containing stack boundaries). To prevent unwind loops which go
> through a stack multiple times, we have to track the set of unwound
> stacks, requiring a stack_type enum which needs to cater for all the
> stacks of all possible callees. To prevent loops within a stack, we must
> track the prior FP values.
>
> This patch reworks the unwinder to minimize the work in the core of the
> unwinder, and to remove the need for the stack_type enum. The set of
> accessible stacks (and their boundaries) are determined at the start of
> the unwind, and the current stack is tracked during the unwind, with
> completed stacks removed from the set of accessible stacks. This makes
> the boundary checks more accurate (e.g. detecting overlapped frame
> records), and removes the need for separate tracking of the prior FP and
> visited stacks.
>
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Cc: Fuad Tabba <tabba at google.com>
> Cc: Kalesh Singh <kaleshsingh at google.com>
> Cc: Madhavan T. Venkataraman <madvenka at linux.microsoft.com>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Mark Brown <broonie at kernel.org>

Reviewed-by: Kalesh Singh <kaleshsingh at google.com>

> ---
>  arch/arm64/include/asm/stacktrace.h        |   5 -
>  arch/arm64/include/asm/stacktrace/common.h | 158 ++++++++++-----------
>  arch/arm64/kernel/stacktrace.c             |  91 +++++-------
>  arch/arm64/kvm/hyp/nvhe/stacktrace.c       |  35 ++---
>  arch/arm64/kvm/stacktrace.c                |  36 ++---
>  5 files changed, 131 insertions(+), 194 deletions(-)
>
> diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
> index aad0c6258721d..5a0edb064ea47 100644
> --- a/arch/arm64/include/asm/stacktrace.h
> +++ b/arch/arm64/include/asm/stacktrace.h
> @@ -30,7 +30,6 @@ static inline struct stack_info stackinfo_get_irq(void)
>         return (struct stack_info) {
>                 .low = low,
>                 .high = high,
> -               .type = STACK_TYPE_IRQ,
>         };
>  }
>
> @@ -48,7 +47,6 @@ static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk
>         return (struct stack_info) {
>                 .low = low,
>                 .high = high,
> -               .type = STACK_TYPE_TASK,
>         };
>  }
>
> @@ -70,7 +68,6 @@ static inline struct stack_info stackinfo_get_overflow(void)
>         return (struct stack_info) {
>                 .low = low,
>                 .high = high,
> -               .type = STACK_TYPE_OVERFLOW,
>         };
>  }
>  #else
> @@ -89,7 +86,6 @@ static inline struct stack_info stackinfo_get_sdei_normal(void)
>         return (struct stack_info) {
>                 .low = low,
>                 .high = high,
> -               .type = STACK_TYPE_SDEI_NORMAL,
>         };
>  }
>
> @@ -101,7 +97,6 @@ static inline struct stack_info stackinfo_get_sdei_critical(void)
>         return (struct stack_info) {
>                 .low = low,
>                 .high = high,
> -               .type = STACK_TYPE_SDEI_CRITICAL,
>         };
>  }
>  #else
> diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
> index 0bd9d7ad295e0..c594f332bb946 100644
> --- a/arch/arm64/include/asm/stacktrace/common.h
> +++ b/arch/arm64/include/asm/stacktrace/common.h
> @@ -9,26 +9,12 @@
>  #ifndef __ASM_STACKTRACE_COMMON_H
>  #define __ASM_STACKTRACE_COMMON_H
>
> -#include <linux/bitmap.h>
> -#include <linux/bitops.h>
>  #include <linux/kprobes.h>
>  #include <linux/types.h>
>
> -enum stack_type {
> -       STACK_TYPE_UNKNOWN,
> -       STACK_TYPE_TASK,
> -       STACK_TYPE_IRQ,
> -       STACK_TYPE_OVERFLOW,
> -       STACK_TYPE_SDEI_NORMAL,
> -       STACK_TYPE_SDEI_CRITICAL,
> -       STACK_TYPE_HYP,
> -       __NR_STACK_TYPES
> -};
> -
>  struct stack_info {
>         unsigned long low;
>         unsigned long high;
> -       enum stack_type type;
>  };
>
>  /*
> @@ -38,32 +24,27 @@ struct stack_info {
>   * @fp:          The fp value in the frame record (or the real fp)
>   * @pc:          The lr value in the frame record (or the real lr)
>   *
> - * @stacks_done: Stacks which have been entirely unwound, for which it is no
> - *               longer valid to unwind to.
> - *
> - * @prev_fp:     The fp that pointed to this frame record, or a synthetic value
> - *               of 0. This is used to ensure that within a stack, each
> - *               subsequent frame record is at an increasing address.
> - * @prev_type:   The type of stack this frame record was on, or a synthetic
> - *               value of STACK_TYPE_UNKNOWN. This is used to detect a
> - *               transition from one stack to another.
> - *
>   * @kr_cur:      When KRETPROBES is selected, holds the kretprobe instance
>   *               associated with the most recently encountered replacement lr
>   *               value.
>   *
>   * @task:        The task being unwound.
> + *
> + * @stack:       The stack currently being unwound.
> + * @stacks:      An array of stacks which can be unwound.
> + * @nr_stacks:   The number of stacks in @stacks.
>   */
>  struct unwind_state {
>         unsigned long fp;
>         unsigned long pc;
> -       DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
> -       unsigned long prev_fp;
> -       enum stack_type prev_type;
>  #ifdef CONFIG_KRETPROBES
>         struct llist_node *kr_cur;
>  #endif
>         struct task_struct *task;
> +
> +       struct stack_info stack;
> +       struct stack_info *stacks;
> +       int nr_stacks;
>  };
>
>  static inline struct stack_info stackinfo_get_unknown(void)
> @@ -71,7 +52,6 @@ static inline struct stack_info stackinfo_get_unknown(void)
>         return (struct stack_info) {
>                 .low = 0,
>                 .high = 0,
> -               .type = STACK_TYPE_UNKNOWN,
>         };
>  }
>
> @@ -95,18 +75,7 @@ static inline void unwind_init_common(struct unwind_state *state,
>         state->kr_cur = NULL;
>  #endif
>
> -       /*
> -        * Prime the first unwind.
> -        *
> -        * In unwind_next() we'll check that the FP points to a valid stack,
> -        * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
> -        * treated as a transition to whichever stack that happens to be. The
> -        * prev_fp value won't be used, but we set it to 0 such that it is
> -        * definitely not an accessible stack address.
> -        */
> -       bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
> -       state->prev_fp = 0;
> -       state->prev_type = STACK_TYPE_UNKNOWN;
> +       state->stack = stackinfo_get_unknown();
>  }
>
>  /*
> @@ -120,44 +89,91 @@ static inline void unwind_init_common(struct unwind_state *state,
>   */
>  typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp);
>
> +static struct stack_info *unwind_find_next_stack(const struct unwind_state *state,
> +                                                unsigned long sp,
> +                                                unsigned long size)
> +{
> +       for (int i = 0; i < state->nr_stacks; i++) {
> +               struct stack_info *info = &state->stacks[i];
> +
> +               if (stackinfo_on_stack(info, sp, size))
> +                       return info;
> +       }
> +
> +       return NULL;
> +}
> +
>  /*
> - * on_accessible_stack_fn() - Check whether a stack range is on any
> - * of the possible stacks.
> + * unwind_consume_stack - Check if an object is on an accessible stack,
> + * updating stack boundaries so that future unwind steps cannot consume this
> + * object again.
>   *
> - * @tsk:  task whose stack is being unwound
> - * @sp:   stack address being checked
> - * @size: size of the stack range being checked
> - * @info: stack unwinding context
> + * @state: the current unwind state.
> + * @sp:    the base address of the object.
> + * @size:  the size of the object.
>   */
> -typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
> -                                      unsigned long sp, unsigned long size,
> -                                      struct stack_info *info);
> +static inline int unwind_consume_stack(struct unwind_state *state,
> +                                      unsigned long sp,
> +                                      unsigned long size)
> +{
> +       struct stack_info *next;
> +
> +       if (stackinfo_on_stack(&state->stack, sp, size))
> +               goto found;
> +
> +       next = unwind_find_next_stack(state, sp, size);
> +       if (!next)
> +               return -EINVAL;
> +
> +       /*
> +        * Stack transitions are strictly one-way, and once we've
> +        * transitioned from one stack to another, it's never valid to
> +        * unwind back to the old stack.
> +        *
> +        * Remove the current stack from the list of stacks so that it cannot
> +        * be found on a subsequent transition.
> +        *
> +        * Note that stacks can nest in several valid orders, e.g.
> +        *
> +        *   TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
> +        *   TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
> +        *   HYP -> OVERFLOW
> +        *
> +        * ... so we do not check the specific order of stack
> +        * transitions.
> +        */
> +       state->stack = *next;
> +       *next = stackinfo_get_unknown();
> +
> +found:
> +       /*
> +        * Future unwind steps can only consume stack above this frame record.
> +        * Update the current stack to start immediately above it.
> +        */
> +       state->stack.low = sp + size;
> +       return 0;
> +}
>
>  /*
>   * unwind_next_frame_record() - Unwind to the next frame record indicated by
>   * @state->fp.
>   *
>   * @state:        the current unwind state.
> - * @accessible:   determines whether the frame record is accessible
>   * @translate_fp: translates the fp prior to access (may be NULL)
>   */
>  static inline int
>  unwind_next_frame_record(struct unwind_state *state,
> -                        on_accessible_stack_fn accessible,
>                          stack_trace_translate_fp_fn translate_fp)
>  {
> -       struct stack_info info;
>         unsigned long fp = state->fp, kern_fp = fp;
> -       struct task_struct *tsk = state->task;
> +       int err;
>
>         if (fp & 0x7)
>                 return -EINVAL;
>
> -       if (!accessible(tsk, fp, 16, &info))
> -               return -EINVAL;
> -
> -       if (test_bit(info.type, state->stacks_done))
> -               return -EINVAL;
> +       err = unwind_consume_stack(state, fp, 16);
> +       if (err)
> +               return err;
>
>         /*
>          * If fp is not from the current address space perform the necessary
> @@ -167,34 +183,10 @@ unwind_next_frame_record(struct unwind_state *state,
>                 return -EINVAL;
>
>         /*
> -        * As stacks grow downward, any valid record on the same stack must be
> -        * at a strictly higher address than the prior record.
> -        *
> -        * Stacks can nest in several valid orders, e.g.
> -        *
> -        * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
> -        * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
> -        * HYP -> OVERFLOW
> -        *
> -        * ... but the nesting itself is strict. Once we transition from one
> -        * stack to another, it's never valid to unwind back to that first
> -        * stack.
> -        */
> -       if (info.type == state->prev_type) {
> -               if (fp <= state->prev_fp)
> -                       return -EINVAL;
> -       } else {
> -               __set_bit(state->prev_type, state->stacks_done);
> -       }
> -
> -       /*
> -        * Record this frame record's values and location. The prev_fp and
> -        * prev_type are only meaningful to the next unwind_next() invocation.
> +        * Record this frame record's values.
>          */
>         state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
>         state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
> -       state->prev_fp = fp;
> -       state->prev_type = info.type;
>
>         return 0;
>  }
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index ca56fd732c2a9..1133be3ff774d 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -67,57 +67,6 @@ static inline void unwind_init_from_task(struct unwind_state *state,
>         state->pc = thread_saved_pc(task);
>  }
>
> -static bool on_accessible_stack(const struct task_struct *tsk,
> -                               unsigned long sp, unsigned long size,
> -                               struct stack_info *info)
> -{
> -       struct stack_info tmp;
> -
> -       tmp = stackinfo_get_task(tsk);
> -       if (stackinfo_on_stack(&tmp, sp, size))
> -               goto found;
> -
> -       /*
> -        * We can only safely access per-cpu stacks when unwinding the current
> -        * task in a non-preemptible context.
> -        */
> -       if (tsk != current || preemptible())
> -               goto not_found;
> -
> -       tmp = stackinfo_get_irq();
> -       if (stackinfo_on_stack(&tmp, sp, size))
> -               goto found;
> -
> -       tmp = stackinfo_get_overflow();
> -       if (stackinfo_on_stack(&tmp, sp, size))
> -               goto found;
> -
> -       /*
> -        * We can only safely access SDEI stacks which unwinding the current
> -        * task in an NMI context.
> -        */
> -       if (!IS_ENABLED(CONFIG_VMAP_STACK) ||
> -           !IS_ENABLED(CONFIG_ARM_SDE_INTERFACE) ||
> -           !in_nmi())
> -               goto not_found;
> -
> -       tmp = stackinfo_get_sdei_normal();
> -       if (stackinfo_on_stack(&tmp, sp, size))
> -               goto found;
> -
> -       tmp = stackinfo_get_sdei_critical();
> -       if (stackinfo_on_stack(&tmp, sp, size))
> -               goto found;
> -
> -not_found:
> -       *info = stackinfo_get_unknown();
> -       return false;
> -
> -found:
> -       *info = tmp;
> -       return true;
> -}
> -
>  /*
>   * Unwind from one frame record (A) to the next frame record (B).
>   *
> @@ -135,7 +84,7 @@ static int notrace unwind_next(struct unwind_state *state)
>         if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
>                 return -ENOENT;
>
> -       err = unwind_next_frame_record(state, on_accessible_stack, NULL);
> +       err = unwind_next_frame_record(state, NULL);
>         if (err)
>                 return err;
>
> @@ -215,11 +164,47 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
>         barrier();
>  }
>
> +/*
> + * Per-cpu stacks are only accessible when unwinding the current task in a
> + * non-preemptible context.
> + */
> +#define STACKINFO_CPU(name)                                    \
> +       ({                                                      \
> +               ((task == current) && !preemptible())           \
> +                       ? stackinfo_get_##name()                \
> +                       : stackinfo_get_unknown();              \
> +       })
> +
> +/*
> + * SDEI stacks are only accessible when unwinding the current task in an NMI
> + * context.
> + */
> +#define STACKINFO_SDEI(name)                                   \
> +       ({                                                      \
> +               ((task == current) && in_nmi())                 \
> +                       ? stackinfo_get_sdei_##name()           \
> +                       : stackinfo_get_unknown();              \
> +       })
> +
>  noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
>                               void *cookie, struct task_struct *task,
>                               struct pt_regs *regs)
>  {
> -       struct unwind_state state;
> +       struct stack_info stacks[] = {
> +               stackinfo_get_task(task),
> +               STACKINFO_CPU(irq),
> +#if defined (CONFIG_VMAP_STACK)
> +               STACKINFO_CPU(overflow),
> +#endif
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
> +               STACKINFO_SDEI(normal),
> +               STACKINFO_SDEI(critical),
> +#endif
> +       };
> +       struct unwind_state state = {
> +               .stacks = stacks,
> +               .nr_stacks = ARRAY_SIZE(stacks),
> +       };
>
>         if (regs) {
>                 if (task != current)
> diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
> index 5da0d44f61b73..08e1325ead73f 100644
> --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c
> +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
> @@ -47,7 +47,6 @@ static struct stack_info stackinfo_get_overflow(void)
>         return (struct stack_info) {
>                 .low = low,
>                 .high = high,
> -               .type = STACK_TYPE_OVERFLOW,
>         };
>  }
>
> @@ -60,35 +59,12 @@ static struct stack_info stackinfo_get_hyp(void)
>         return (struct stack_info) {
>                 .low = low,
>                 .high = high,
> -               .type = STACK_TYPE_HYP,
>         };
>  }
>
> -static bool on_accessible_stack(const struct task_struct *tsk,
> -                               unsigned long sp, unsigned long size,
> -                               struct stack_info *info)
> -{
> -       struct stack_info tmp;
> -
> -       tmp = stackinfo_get_overflow();
> -       if (stackinfo_on_stack(&tmp, sp, size))
> -               goto found;
> -
> -       tmp = stackinfo_get_hyp();
> -       if (stackinfo_on_stack(&tmp, sp, size))
> -               goto found;
> -
> -       *info = stackinfo_get_unknown();
> -       return false;
> -
> -found:
> -       *info = tmp;
> -       return true;
> -}
> -
>  static int unwind_next(struct unwind_state *state)
>  {
> -       return unwind_next_frame_record(state, on_accessible_stack, NULL);
> +       return unwind_next_frame_record(state, NULL);
>  }
>
>  static void notrace unwind(struct unwind_state *state,
> @@ -144,7 +120,14 @@ static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
>   */
>  static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
>  {
> -       struct unwind_state state;
> +       struct stack_info stacks[] = {
> +               stackinfo_get_overflow(),
> +               stackinfo_get_hyp(),
> +       };
> +       struct unwind_state state = {
> +               .stacks = stacks,
> +               .nr_stacks = ARRAY_SIZE(stacks),
> +       };
>         int idx = 0;
>
>         kvm_nvhe_unwind_init(&state, fp, pc);
> diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c
> index 62ffa2b40da10..8295e132da2f0 100644
> --- a/arch/arm64/kvm/stacktrace.c
> +++ b/arch/arm64/kvm/stacktrace.c
> @@ -31,7 +31,6 @@ static struct stack_info stackinfo_get_overflow(void)
>         return (struct stack_info) {
>                 .low = low,
>                 .high = high,
> -               .type = STACK_TYPE_OVERFLOW,
>         };
>  }
>
> @@ -45,7 +44,6 @@ static struct stack_info stackinfo_get_hyp(void)
>         return (struct stack_info) {
>                 .low = low,
>                 .high = high,
> -               .type = STACK_TYPE_HYP,
>         };
>  }
>
> @@ -94,32 +92,9 @@ static bool kvm_nvhe_stack_kern_va(unsigned long *addr)
>         return true;
>  }
>
> -static bool on_accessible_stack(const struct task_struct *tsk,
> -                               unsigned long sp, unsigned long size,
> -                               struct stack_info *info)
> -{
> -       struct stack_info tmp;
> -
> -       tmp = stackinfo_get_overflow();
> -       if (stackinfo_on_stack(&tmp, sp, size))
> -               goto found;
> -
> -       tmp = stackinfo_get_hyp();
> -       if (stackinfo_on_stack(&tmp, sp, size))
> -               goto found;
> -
> -       *info = stackinfo_get_unknown();
> -       return false;
> -
> -found:
> -       *info = tmp;
> -       return true;
> -}
> -
>  static int unwind_next(struct unwind_state *state)
>  {
> -       return unwind_next_frame_record(state, on_accessible_stack,
> -                                       kvm_nvhe_stack_kern_va);
> +       return unwind_next_frame_record(state, kvm_nvhe_stack_kern_va);
>  }
>
>  static void unwind(struct unwind_state *state,
> @@ -177,7 +152,14 @@ static void kvm_nvhe_dump_backtrace_end(void)
>  static void hyp_dump_backtrace(unsigned long hyp_offset)
>  {
>         struct kvm_nvhe_stacktrace_info *stacktrace_info;
> -       struct unwind_state state;
> +       struct stack_info stacks[] = {
> +               stackinfo_get_overflow(),
> +               stackinfo_get_hyp(),
> +       };
> +       struct unwind_state state = {
> +               .stacks = stacks,
> +               .nr_stacks = ARRAY_SIZE(stacks),
> +       };
>
>         stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
>
> --
> 2.30.2
>



More information about the linux-arm-kernel mailing list