[PATCH v4 8/9] arm64: stacktrace: track all stack boundaries explicitly

Mark Rutland mark.rutland at arm.com
Thu Sep 1 06:06:45 PDT 2022


Currently we call an on_accessible_stack() callback for each step of the
unwinder, requiring redundant work to be performed in the core of the
unwind loop (e.g. disabling preemption around accesses to per-cpu
variables containing stack boundaries). To prevent unwind loops which go
through a stack multiple times, we have to track the set of unwound
stacks, requiring a stack_type enum which needs to cater for all the
stacks of all possible callees. To prevent loops within a stack, we must
track the prior FP values.

This patch reworks the unwinder to minimize the work in the core of the
unwinder, and to remove the need for the stack_type enum. The set of
accessible stacks (and their boundaries) are determined at the start of
the unwind, and the current stack is tracked during the unwind, with
completed stacks removed from the set of accessible stacks. This makes
the boundary checks more accurate (e.g. detecting overlapped frame
records), and removes the need for separate tracking of the prior FP and
visited stacks.

Signed-off-by: Mark Rutland <mark.rutland at arm.com>
Reviewed-by: Kalesh Singh <kaleshsingh at google.com>
Reviewed-by: Madhavan T. Venkataraman <madvenka at linux.microsoft.com>
Reviewed-by: Mark Brown <broonie at kernel.org>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: Fuad Tabba <tabba at google.com>
Cc: Marc Zyngier <maz at kernel.org>
Cc: Will Deacon <will at kernel.org>
---
 arch/arm64/include/asm/stacktrace.h        |   5 -
 arch/arm64/include/asm/stacktrace/common.h | 164 ++++++++++-----------
 arch/arm64/kernel/stacktrace.c             |  91 +++++-------
 arch/arm64/kvm/hyp/nvhe/stacktrace.c       |  35 ++---
 arch/arm64/kvm/stacktrace.c                |  36 ++---
 5 files changed, 132 insertions(+), 199 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index aad0c6258721d..5a0edb064ea47 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -30,7 +30,6 @@ static inline struct stack_info stackinfo_get_irq(void)
 	return (struct stack_info) {
 		.low = low,
 		.high = high,
-		.type = STACK_TYPE_IRQ,
 	};
 }
 
@@ -48,7 +47,6 @@ static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk
 	return (struct stack_info) {
 		.low = low,
 		.high = high,
-		.type = STACK_TYPE_TASK,
 	};
 }
 
@@ -70,7 +68,6 @@ static inline struct stack_info stackinfo_get_overflow(void)
 	return (struct stack_info) {
 		.low = low,
 		.high = high,
-		.type = STACK_TYPE_OVERFLOW,
 	};
 }
 #else
@@ -89,7 +86,6 @@ static inline struct stack_info stackinfo_get_sdei_normal(void)
 	return (struct stack_info) {
 		.low = low,
 		.high = high,
-		.type = STACK_TYPE_SDEI_NORMAL,
 	};
 }
 
@@ -101,7 +97,6 @@ static inline struct stack_info stackinfo_get_sdei_critical(void)
 	return (struct stack_info) {
 		.low = low,
 		.high = high,
-		.type = STACK_TYPE_SDEI_CRITICAL,
 	};
 }
 #else
diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
index 2f972441f572e..638008f485972 100644
--- a/arch/arm64/include/asm/stacktrace/common.h
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -9,26 +9,12 @@
 #ifndef __ASM_STACKTRACE_COMMON_H
 #define __ASM_STACKTRACE_COMMON_H
 
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
 #include <linux/kprobes.h>
 #include <linux/types.h>
 
-enum stack_type {
-	STACK_TYPE_UNKNOWN,
-	STACK_TYPE_TASK,
-	STACK_TYPE_IRQ,
-	STACK_TYPE_OVERFLOW,
-	STACK_TYPE_SDEI_NORMAL,
-	STACK_TYPE_SDEI_CRITICAL,
-	STACK_TYPE_HYP,
-	__NR_STACK_TYPES
-};
-
 struct stack_info {
 	unsigned long low;
 	unsigned long high;
-	enum stack_type type;
 };
 
 /**
@@ -37,32 +23,27 @@ struct stack_info {
  * @fp:          The fp value in the frame record (or the real fp)
  * @pc:          The lr value in the frame record (or the real lr)
  *
- * @stacks_done: Stacks which have been entirely unwound, for which it is no
- *               longer valid to unwind to.
- *
- * @prev_fp:     The fp that pointed to this frame record, or a synthetic value
- *               of 0. This is used to ensure that within a stack, each
- *               subsequent frame record is at an increasing address.
- * @prev_type:   The type of stack this frame record was on, or a synthetic
- *               value of STACK_TYPE_UNKNOWN. This is used to detect a
- *               transition from one stack to another.
- *
  * @kr_cur:      When KRETPROBES is selected, holds the kretprobe instance
  *               associated with the most recently encountered replacement lr
  *               value.
  *
  * @task:        The task being unwound.
+ *
+ * @stack:       The stack currently being unwound.
+ * @stacks:      An array of stacks which can be unwound.
+ * @nr_stacks:   The number of stacks in @stacks.
  */
 struct unwind_state {
 	unsigned long fp;
 	unsigned long pc;
-	DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
-	unsigned long prev_fp;
-	enum stack_type prev_type;
 #ifdef CONFIG_KRETPROBES
 	struct llist_node *kr_cur;
 #endif
 	struct task_struct *task;
+
+	struct stack_info stack;
+	struct stack_info *stacks;
+	int nr_stacks;
 };
 
 static inline struct stack_info stackinfo_get_unknown(void)
@@ -70,7 +51,6 @@ static inline struct stack_info stackinfo_get_unknown(void)
 	return (struct stack_info) {
 		.low = 0,
 		.high = 0,
-		.type = STACK_TYPE_UNKNOWN,
 	};
 }
 
@@ -94,18 +74,7 @@ static inline void unwind_init_common(struct unwind_state *state,
 	state->kr_cur = NULL;
 #endif
 
-	/*
-	 * Prime the first unwind.
-	 *
-	 * In unwind_next() we'll check that the FP points to a valid stack,
-	 * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
-	 * treated as a transition to whichever stack that happens to be. The
-	 * prev_fp value won't be used, but we set it to 0 such that it is
-	 * definitely not an accessible stack address.
-	 */
-	bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
-	state->prev_fp = 0;
-	state->prev_type = STACK_TYPE_UNKNOWN;
+	state->stack = stackinfo_get_unknown();
 }
 
 /**
@@ -120,51 +89,94 @@ static inline void unwind_init_common(struct unwind_state *state,
  */
 typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp);
 
+static struct stack_info *unwind_find_next_stack(const struct unwind_state *state,
+						 unsigned long sp,
+						 unsigned long size)
+{
+	for (int i = 0; i < state->nr_stacks; i++) {
+		struct stack_info *info = &state->stacks[i];
+
+		if (stackinfo_on_stack(info, sp, size))
+			return info;
+	}
+
+	return NULL;
+}
+
 /**
- * typedef on_accessible_stack_fn() - Check whether a stack range is on any of
- * the possible stacks.
- *
- * @tsk:  task whose stack is being unwound
- * @sp:   stack address being checked
- * @size: size of the stack range being checked
- * @info: stack unwinding context
- *
- * Return: true if the stack range is accessible, false otherwise.
+ * unwind_consume_stack() - Check if an object is on an accessible stack,
+ * updating stack boundaries so that future unwind steps cannot consume this
+ * object again.
  *
- * Upon success @info is updated with information for the relevant stack.
+ * @state: the current unwind state.
+ * @sp:    the base address of the object.
+ * @size:  the size of the object.
  *
- * Upon failure @info is updated with the UNKNOWN stack.
+ * Return: 0 upon success, an error code otherwise.
  */
-typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
-				       unsigned long sp, unsigned long size,
-				       struct stack_info *info);
+static inline int unwind_consume_stack(struct unwind_state *state,
+				       unsigned long sp,
+				       unsigned long size)
+{
+	struct stack_info *next;
+
+	if (stackinfo_on_stack(&state->stack, sp, size))
+		goto found;
+
+	next = unwind_find_next_stack(state, sp, size);
+	if (!next)
+		return -EINVAL;
+
+	/*
+	 * Stack transitions are strictly one-way, and once we've
+	 * transitioned from one stack to another, it's never valid to
+	 * unwind back to the old stack.
+	 *
+	 * Remove the current stack from the list of stacks so that it cannot
+	 * be found on a subsequent transition.
+	 *
+	 * Note that stacks can nest in several valid orders, e.g.
+	 *
+	 *   TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+	 *   TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+	 *   HYP -> OVERFLOW
+	 *
+	 * ... so we do not check the specific order of stack
+	 * transitions.
+	 */
+	state->stack = *next;
+	*next = stackinfo_get_unknown();
+
+found:
+	/*
+	 * Future unwind steps can only consume stack above this frame record.
+	 * Update the current stack to start immediately above it.
+	 */
+	state->stack.low = sp + size;
+	return 0;
+}
 
 /**
  * unwind_next_frame_record() - Unwind to the next frame record.
  *
  * @state:        the current unwind state.
- * @accessible:   determines whether the frame record is accessible
  * @translate_fp: translates the fp prior to access (may be NULL)
  *
  * Return: 0 upon success, an error code otherwise.
  */
 static inline int
 unwind_next_frame_record(struct unwind_state *state,
-			 on_accessible_stack_fn accessible,
 			 stack_trace_translate_fp_fn translate_fp)
 {
-	struct stack_info info;
 	unsigned long fp = state->fp, kern_fp = fp;
-	struct task_struct *tsk = state->task;
+	int err;
 
 	if (fp & 0x7)
 		return -EINVAL;
 
-	if (!accessible(tsk, fp, 16, &info))
-		return -EINVAL;
-
-	if (test_bit(info.type, state->stacks_done))
-		return -EINVAL;
+	err = unwind_consume_stack(state, fp, 16);
+	if (err)
+		return err;
 
 	/*
 	 * If fp is not from the current address space perform the necessary
@@ -174,34 +186,10 @@ unwind_next_frame_record(struct unwind_state *state,
 		return -EINVAL;
 
 	/*
-	 * As stacks grow downward, any valid record on the same stack must be
-	 * at a strictly higher address than the prior record.
-	 *
-	 * Stacks can nest in several valid orders, e.g.
-	 *
-	 * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
-	 * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
-	 * HYP -> OVERFLOW
-	 *
-	 * ... but the nesting itself is strict. Once we transition from one
-	 * stack to another, it's never valid to unwind back to that first
-	 * stack.
-	 */
-	if (info.type == state->prev_type) {
-		if (fp <= state->prev_fp)
-			return -EINVAL;
-	} else {
-		__set_bit(state->prev_type, state->stacks_done);
-	}
-
-	/*
-	 * Record this frame record's values and location. The prev_fp and
-	 * prev_type are only meaningful to the next unwind_next() invocation.
+	 * Record this frame record's values.
 	 */
 	state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
 	state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
-	state->prev_fp = fp;
-	state->prev_type = info.type;
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index ca56fd732c2a9..9c8820f242625 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -67,57 +67,6 @@ static inline void unwind_init_from_task(struct unwind_state *state,
 	state->pc = thread_saved_pc(task);
 }
 
-static bool on_accessible_stack(const struct task_struct *tsk,
-				unsigned long sp, unsigned long size,
-				struct stack_info *info)
-{
-	struct stack_info tmp;
-
-	tmp = stackinfo_get_task(tsk);
-	if (stackinfo_on_stack(&tmp, sp, size))
-		goto found;
-
-	/*
-	 * We can only safely access per-cpu stacks when unwinding the current
-	 * task in a non-preemptible context.
-	 */
-	if (tsk != current || preemptible())
-		goto not_found;
-
-	tmp = stackinfo_get_irq();
-	if (stackinfo_on_stack(&tmp, sp, size))
-		goto found;
-
-	tmp = stackinfo_get_overflow();
-	if (stackinfo_on_stack(&tmp, sp, size))
-		goto found;
-
-	/*
-	 * We can only safely access SDEI stacks which unwinding the current
-	 * task in an NMI context.
-	 */
-	if (!IS_ENABLED(CONFIG_VMAP_STACK) ||
-	    !IS_ENABLED(CONFIG_ARM_SDE_INTERFACE) ||
-	    !in_nmi())
-		goto not_found;
-
-	tmp = stackinfo_get_sdei_normal();
-	if (stackinfo_on_stack(&tmp, sp, size))
-		goto found;
-
-	tmp = stackinfo_get_sdei_critical();
-	if (stackinfo_on_stack(&tmp, sp, size))
-		goto found;
-
-not_found:
-	*info = stackinfo_get_unknown();
-	return false;
-
-found:
-	*info = tmp;
-	return true;
-}
-
 /*
  * Unwind from one frame record (A) to the next frame record (B).
  *
@@ -135,7 +84,7 @@ static int notrace unwind_next(struct unwind_state *state)
 	if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
 		return -ENOENT;
 
-	err = unwind_next_frame_record(state, on_accessible_stack, NULL);
+	err = unwind_next_frame_record(state, NULL);
 	if (err)
 		return err;
 
@@ -215,11 +164,47 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
 	barrier();
 }
 
+/*
+ * Per-cpu stacks are only accessible when unwinding the current task in a
+ * non-preemptible context.
+ */
+#define STACKINFO_CPU(name)					\
+	({							\
+		((task == current) && !preemptible())		\
+			? stackinfo_get_##name()		\
+			: stackinfo_get_unknown();		\
+	})
+
+/*
+ * SDEI stacks are only accessible when unwinding the current task in an NMI
+ * context.
+ */
+#define STACKINFO_SDEI(name)					\
+	({							\
+		((task == current) && in_nmi())			\
+			? stackinfo_get_sdei_##name()		\
+			: stackinfo_get_unknown();		\
+	})
+
 noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
 			      void *cookie, struct task_struct *task,
 			      struct pt_regs *regs)
 {
-	struct unwind_state state;
+	struct stack_info stacks[] = {
+		stackinfo_get_task(task),
+		STACKINFO_CPU(irq),
+#if defined(CONFIG_VMAP_STACK)
+		STACKINFO_CPU(overflow),
+#endif
+#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
+		STACKINFO_SDEI(normal),
+		STACKINFO_SDEI(critical),
+#endif
+	};
+	struct unwind_state state = {
+		.stacks = stacks,
+		.nr_stacks = ARRAY_SIZE(stacks),
+	};
 
 	if (regs) {
 		if (task != current)
diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
index 5da0d44f61b73..08e1325ead73f 100644
--- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c
+++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
@@ -47,7 +47,6 @@ static struct stack_info stackinfo_get_overflow(void)
 	return (struct stack_info) {
 		.low = low,
 		.high = high,
-		.type = STACK_TYPE_OVERFLOW,
 	};
 }
 
@@ -60,35 +59,12 @@ static struct stack_info stackinfo_get_hyp(void)
 	return (struct stack_info) {
 		.low = low,
 		.high = high,
-		.type = STACK_TYPE_HYP,
 	};
 }
 
-static bool on_accessible_stack(const struct task_struct *tsk,
-				unsigned long sp, unsigned long size,
-				struct stack_info *info)
-{
-	struct stack_info tmp;
-
-	tmp = stackinfo_get_overflow();
-	if (stackinfo_on_stack(&tmp, sp, size))
-		goto found;
-
-	tmp = stackinfo_get_hyp();
-	if (stackinfo_on_stack(&tmp, sp, size))
-		goto found;
-
-	*info = stackinfo_get_unknown();
-	return false;
-
-found:
-	*info = tmp;
-	return true;
-}
-
 static int unwind_next(struct unwind_state *state)
 {
-	return unwind_next_frame_record(state, on_accessible_stack, NULL);
+	return unwind_next_frame_record(state, NULL);
 }
 
 static void notrace unwind(struct unwind_state *state,
@@ -144,7 +120,14 @@ static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
  */
 static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
 {
-	struct unwind_state state;
+	struct stack_info stacks[] = {
+		stackinfo_get_overflow(),
+		stackinfo_get_hyp(),
+	};
+	struct unwind_state state = {
+		.stacks = stacks,
+		.nr_stacks = ARRAY_SIZE(stacks),
+	};
 	int idx = 0;
 
 	kvm_nvhe_unwind_init(&state, fp, pc);
diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c
index 7658c5db47c1d..0b4703945780f 100644
--- a/arch/arm64/kvm/stacktrace.c
+++ b/arch/arm64/kvm/stacktrace.c
@@ -31,7 +31,6 @@ static struct stack_info stackinfo_get_overflow(void)
 	return (struct stack_info) {
 		.low = low,
 		.high = high,
-		.type = STACK_TYPE_OVERFLOW,
 	};
 }
 
@@ -45,7 +44,6 @@ static struct stack_info stackinfo_get_hyp(void)
 	return (struct stack_info) {
 		.low = low,
 		.high = high,
-		.type = STACK_TYPE_HYP,
 	};
 }
 
@@ -102,32 +100,9 @@ static bool kvm_nvhe_stack_kern_record_va(unsigned long *addr)
 	return kvm_nvhe_stack_kern_va(addr, 16);
 }
 
-static bool on_accessible_stack(const struct task_struct *tsk,
-				unsigned long sp, unsigned long size,
-				struct stack_info *info)
-{
-	struct stack_info tmp;
-
-	tmp = stackinfo_get_overflow();
-	if (stackinfo_on_stack(&tmp, sp, size))
-		goto found;
-
-	tmp = stackinfo_get_hyp();
-	if (stackinfo_on_stack(&tmp, sp, size))
-		goto found;
-
-	*info = stackinfo_get_unknown();
-	return false;
-
-found:
-	*info = tmp;
-	return true;
-}
-
 static int unwind_next(struct unwind_state *state)
 {
-	return unwind_next_frame_record(state, on_accessible_stack,
-					kvm_nvhe_stack_kern_record_va);
+	return unwind_next_frame_record(state, kvm_nvhe_stack_kern_record_va);
 }
 
 static void unwind(struct unwind_state *state,
@@ -185,7 +160,14 @@ static void kvm_nvhe_dump_backtrace_end(void)
 static void hyp_dump_backtrace(unsigned long hyp_offset)
 {
 	struct kvm_nvhe_stacktrace_info *stacktrace_info;
-	struct unwind_state state;
+	struct stack_info stacks[] = {
+		stackinfo_get_overflow(),
+		stackinfo_get_hyp(),
+	};
+	struct unwind_state state = {
+		.stacks = stacks,
+		.nr_stacks = ARRAY_SIZE(stacks),
+	};
 
 	stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
 
-- 
2.30.2




More information about the linux-arm-kernel mailing list