[PATCH v10 10/11] arm64: Introduce stack trace reliability checks in the unwinder

nobuta.keiya at fujitsu.com nobuta.keiya at fujitsu.com
Thu Nov 4 05:39:49 PDT 2021


Hi Madhavan,

> -----Original Message-----
> From: madvenka at linux.microsoft.com <madvenka at linux.microsoft.com>
> Sent: Friday, October 15, 2021 11:59 AM
> To: mark.rutland at arm.com; broonie at kernel.org; jpoimboe at redhat.com; ardb at kernel.org; Nobuta, Keiya/信田 圭哉
> <nobuta.keiya at fujitsu.com>; sjitindarsingh at gmail.com; catalin.marinas at arm.com; will at kernel.org; jmorris at namei.org;
> linux-arm-kernel at lists.infradead.org; live-patching at vger.kernel.org; linux-kernel at vger.kernel.org;
> madvenka at linux.microsoft.com
> Subject: [PATCH v10 10/11] arm64: Introduce stack trace reliability checks in the unwinder
> 
> From: "Madhavan T. Venkataraman" <madvenka at linux.microsoft.com>
> 
> There are some kernel features and conditions that make a stack trace unreliable. Callers may require the unwinder to detect
> these cases.
> E.g., livepatch.
> 
> Introduce a new function called unwind_check_reliability() that will detect these cases and set a flag in the stack frame. Call
> unwind_check_reliability() for every frame, that is, in unwind_start() and unwind_next().
> 
> Introduce the first reliability check in unwind_check_reliability() - If a return PC is not a valid kernel text address, consider the
> stack trace unreliable. It could be some generated code. Other reliability checks will be added in the future.
> 
> Let unwind() return a boolean to indicate if the stack trace is reliable.
> 
> Introduce arch_stack_walk_reliable() for ARM64. This works like
> arch_stack_walk() except that it returns -EINVAL if the stack trace is not reliable.
> 
> Until all the reliability checks are in place, arch_stack_walk_reliable() may not be used by livepatch. But it may be used by
> debug and test code.
> 
> Signed-off-by: Madhavan T. Venkataraman <madvenka at linux.microsoft.com>
> ---
>  arch/arm64/include/asm/stacktrace.h |  3 ++
>  arch/arm64/kernel/stacktrace.c      | 48 ++++++++++++++++++++++++++++-
>  2 files changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
> index ba2180c7d5cd..ce0710fa3037 100644
> --- a/arch/arm64/include/asm/stacktrace.h
> +++ b/arch/arm64/include/asm/stacktrace.h
> @@ -51,6 +51,8 @@ struct stack_info {
>   *               replacement lr value in the ftrace graph stack.
>   *
>   * @failed:      Unwind failed.
> + *
> + * @reliable:    Stack trace is reliable.
>   */
>  struct stackframe {
>  	unsigned long fp;
> @@ -62,6 +64,7 @@ struct stackframe {
>  	int graph;
>  #endif
>  	bool failed;
> +	bool reliable;
>  };
> 
>  extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, diff --git a/arch/arm64/kernel/stacktrace.c
> b/arch/arm64/kernel/stacktrace.c index 8e9e6f38c975..142f08ae515f 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -18,6 +18,22 @@
>  #include <asm/stack_pointer.h>
>  #include <asm/stacktrace.h>
> 
> +/*
> + * Check the stack frame for conditions that make further unwinding unreliable.
> + */
> +static void notrace unwind_check_reliability(struct stackframe *frame)
> +{
> +	/*
> +	 * If the PC is not a known kernel text address, then we cannot
> +	 * be sure that a subsequent unwind will be reliable, as we
> +	 * don't know that the code follows our unwind requirements.
> +	 */
> +	if (!__kernel_text_address(frame->pc))
> +		frame->reliable = false;
> +}
> +
> +NOKPROBE_SYMBOL(unwind_check_reliability);
> +
>  /*
>   * AArch64 PCS assigns the frame pointer to x29.
>   *
> @@ -55,6 +71,8 @@ static void notrace unwind_start(struct stackframe *frame, unsigned long fp,
>  	frame->prev_fp = 0;
>  	frame->prev_type = STACK_TYPE_UNKNOWN;
>  	frame->failed = false;
> +	frame->reliable = true;
> +	unwind_check_reliability(frame);
>  }
> 
>  NOKPROBE_SYMBOL(unwind_start);
> @@ -138,6 +156,7 @@ static void notrace unwind_next(struct task_struct *tsk,  #endif /*
> CONFIG_FUNCTION_GRAPH_TRACER */
> 
>  	frame->pc = ptrauth_strip_insn_pac(frame->pc);
> +	unwind_check_reliability(frame);
>  }

Isn't it necessary to check "final frame" before unwind_check_reliability()?
The frame at this point is unwound frame, so may be last frame. 

Or if move unwind_check_reliability() into unwind(), I think unwind() can
be twins as below:

~~~~~~~~
unwind(...) {
	<...>
	for (unwind_start(...); unwind_continue(...); unwind_next(...))
		unwind_check_reliability(&frame);
}

unwind_reliable(...) {
	<...>
	for (unwind_start(...); unwind_continue(...); unwind_next(...)) {
		unwind_check_reliability(&frame);
		if (!frame.reliable)
			break;
	}

	return (frame.reliable && !frame.failed);
}
~~~~~~~~



Thanks,
Keiya


> 
>  NOKPROBE_SYMBOL(unwind_next);
> @@ -167,7 +186,7 @@ static bool notrace unwind_continue(struct task_struct *task,
> 
>  NOKPROBE_SYMBOL(unwind_continue);
> 
> -static void notrace unwind(struct task_struct *tsk,
> +static bool notrace unwind(struct task_struct *tsk,
>  			   unsigned long fp, unsigned long pc,
>  			   bool (*fn)(void *, unsigned long),
>  			   void *data)
> @@ -177,6 +196,7 @@ static void notrace unwind(struct task_struct *tsk,
>  	unwind_start(&frame, fp, pc);
>  	while (unwind_continue(tsk, &frame, fn, data))
>  		unwind_next(tsk, &frame);
> +	return frame.reliable;
>  }
> 
>  NOKPROBE_SYMBOL(unwind);
> @@ -238,4 +258,30 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
> 
>  }
> 
> +/*
> + * arch_stack_walk_reliable() may not be used for livepatch until all
> +of
> + * the reliability checks are in place in unwind_consume(). However,
> + * debug and test code can choose to use it even if all the checks are
> +not
> + * in place.
> + */
> +noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn,
> +					      void *cookie,
> +					      struct task_struct *task)
> +{
> +	unsigned long fp, pc;
> +
> +	if (task == current) {
> +		/* Skip arch_stack_walk_reliable() in the stack trace. */
> +		fp = (unsigned long)__builtin_frame_address(1);
> +		pc = (unsigned long)__builtin_return_address(0);
> +	} else {
> +		/* Caller guarantees that the task is not running. */
> +		fp = thread_saved_fp(task);
> +		pc = thread_saved_pc(task);
> +	}
> +	if (unwind(task, fp, pc, consume_fn, cookie))
> +		return 0;
> +	return -EINVAL;
> +}
> +
>  #endif
> --
> 2.25.1




More information about the linux-arm-kernel mailing list