[PATCH] ARM: unwind: only permit stack switch when unwinding call_with_stack()

Ard Biesheuvel ardb at kernel.org
Thu Mar 10 23:45:29 PST 2022


Commit b6506981f880 ("ARM: unwind: support unwinding across multiple
stacks") updated the logic in the ARM unwinder to widen the bounds
within which SP is assumed to be valid, in order to allow the unwind to
traverse from the IRQ stack to the task stack. This is necessary, as
otherwise, unwinds started from the IRQ stack would terminate in the IRQ
exception handler, making stacktraces substantially less useful.

This turns out to be a mistake, as it breaks asynchronous unwinding
across exceptions, when the exception is taken before the stack frame is
consistent with the unwind info. For instance, in the following
backtrace:

  ...
   generic_handle_arch_irq from call_with_stack+0x18/0x20
   call_with_stack from __irq_svc+0x80/0x98
  Exception stack(0xc7093e20 to 0xc7093e68)
  3e20: b6a94a88 c7093ea0 00000008 00000000 c7093ea0 b7e127d0 00000051 c9220000
  3e40: b6a94a88 b6a94a88 00000004 0002b000 0036b570 c7093e70 c040ca2c c0994a90
  3e60: 20070013 ffffffff
   __irq_svc from __copy_to_user_std+0x20/0x378
  ...

we need to apply the following unwind directives:

  0xc099720c <__copy_to_user_std+0x1c>: @0xc295d1d4
    Compact model index: 1
    0x9b      vsp = r11
    0xb1 0x0d pop {r0, r2, r3}
    0x84 0x81 pop {r4, r11, r14}
    0xb0      finish

which tell us to switch to the frame pointer register R11 and proceed
with the unwind from that. However, having been interrupted 0x20 bytes
into the function:

  c09971f0 <__copy_to_user_std>:
  c09971f0:       e59f3350        ldr     r3, [pc, #848]
  c09971f4:       e243c001        sub     ip, r3, #1
  c09971f8:       e05cc000        subs    ip, ip, r0
  c09971fc:       228cc001        addcs   ip, ip, #1
  c0997200:       205cc002        subscs  ip, ip, r2
  c0997204:       33a00000        movcc   r0, #0
  c0997208:       e320f014        csdb
  c099720c:       e3a03000        mov     r3, #0
  c0997210:       e92d481d        push    {r0, r2, r3, r4, fp, lr}
  c0997214:       e1a0b00d        mov     fp, sp
  c0997218:       e2522004        subs    r2, r2, #4

the value for R11 recovered from the previous frame (__irq_svc) will be
a snapshot of its value before the exception was taken (0x0002b000),
which occurred at address __copy_to_user_std+0x20 (0xc0997210), when R11
had not been assigned its value yet.

This means we can never assume that the SP values recovered from the
stack or from the frame pointer are ever safe to use, given the need to
do asynchronous unwinding, and the only robust approach is to revert to
the previous approach, which is to derive bounds for SP based on the
initial value, and never update them.

We can make an exception, though: now that the IRQ stack switch is
guaranteed to occur in call_with_stack(), we can implement a special
case for this function, and use a different set of bounds based on the
knowledge that it will always unwind from R11 rather than SP. As
call_with_stack() is a hand-rolled assembly routine, this is guaranteed
to remain that way.

So let's do a partial revert of b6506981f880, and drop all manipulations
for sp_low and sp_high based on the information collected during the
unwind itself. To support call_with_stack(), set sp_low and sp_high
explicitly to values derived from R11 when we unwind that function.

The only downside is that, while unwinding an overflow of the vmap'ed
stack will work fine as before, we will no longer be able to produce a
backtrace that unwinds the overflow stack itself across the exception
that was raised due to the faulting access to the guard region. However,
this only affects exceptions caused by problems in the stack overflow
handling code itself, in which case the remaining backtrace is not that
relevant.

Fixes: b6506981f880 ("ARM: unwind: support unwinding across multiple stacks")
Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
 arch/arm/kernel/unwind.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index e619ec7856b7..a37ea6c772cd 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -33,6 +33,8 @@
 #include <asm/traps.h>
 #include <asm/unwind.h>
 
+#include "reboot.h"
+
 /* Dummy functions to avoid linker complaints */
 void __aeabi_unwind_cpp_pr0(void)
 {
@@ -52,7 +54,6 @@ EXPORT_SYMBOL(__aeabi_unwind_cpp_pr2);
 struct unwind_ctrl_block {
 	unsigned long vrs[16];		/* virtual register set */
 	const unsigned long *insn;	/* pointer to the current instructions word */
-	unsigned long sp_low;		/* lowest value of sp allowed */
 	unsigned long sp_high;		/* highest value of sp allowed */
 	unsigned long *lr_addr;		/* address of LR value on the stack */
 	/*
@@ -262,9 +263,6 @@ static int unwind_exec_pop_subset_r4_to_r13(struct unwind_ctrl_block *ctrl,
 	}
 	if (!load_sp) {
 		ctrl->vrs[SP] = (unsigned long)vsp;
-	} else {
-		ctrl->sp_low = ctrl->vrs[SP];
-		ctrl->sp_high = ALIGN(ctrl->sp_low, THREAD_SIZE);
 	}
 
 	return URC_OK;
@@ -323,7 +321,6 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 		ctrl->vrs[SP] += ((insn & 0x3f) << 2) + 4;
 	else if ((insn & 0xc0) == 0x40) {
 		ctrl->vrs[SP] -= ((insn & 0x3f) << 2) + 4;
-		ctrl->sp_low = ctrl->vrs[SP];
 	} else if ((insn & 0xf0) == 0x80) {
 		unsigned long mask;
 
@@ -341,8 +338,6 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
 	} else if ((insn & 0xf0) == 0x90 &&
 		   (insn & 0x0d) != 0x0d) {
 		ctrl->vrs[SP] = ctrl->vrs[insn & 0x0f];
-		ctrl->sp_low = ctrl->vrs[SP];
-		ctrl->sp_high = ALIGN(ctrl->sp_low, THREAD_SIZE);
 	} else if ((insn & 0xf0) == 0xa0) {
 		ret = unwind_exec_pop_r4_to_rN(ctrl, insn);
 		if (ret)
@@ -388,10 +383,11 @@ int unwind_frame(struct stackframe *frame)
 {
 	const struct unwind_idx *idx;
 	struct unwind_ctrl_block ctrl;
+	unsigned long sp_low;
 
 	/* store the highest address on the stack to avoid crossing it*/
-	ctrl.sp_low = frame->sp;
-	ctrl.sp_high = ALIGN(ctrl.sp_low - THREAD_SIZE, THREAD_ALIGN)
+	sp_low = frame->sp;
+	ctrl.sp_high = ALIGN(sp_low - THREAD_SIZE, THREAD_ALIGN)
 		       + THREAD_SIZE;
 
 	pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__,
@@ -452,6 +448,16 @@ int unwind_frame(struct stackframe *frame)
 
 	ctrl.check_each_pop = 0;
 
+	if (prel31_to_addr(&idx->addr_offset) == (u32)&call_with_stack) {
+		/*
+		 * call_with_stack() is the only place where we permit SP to
+		 * jump from one stack to another, and since we know it is
+		 * guaranteed to happen, set up the SP bounds accordingly.
+		 */
+		sp_low = frame->fp;
+		ctrl.sp_high = ALIGN(frame->fp, THREAD_SIZE);
+	}
+
 	while (ctrl.entries > 0) {
 		int urc;
 		if ((ctrl.sp_high - ctrl.vrs[SP]) < sizeof(ctrl.vrs))
@@ -459,7 +465,7 @@ int unwind_frame(struct stackframe *frame)
 		urc = unwind_exec_insn(&ctrl);
 		if (urc < 0)
 			return urc;
-		if (ctrl.vrs[SP] < ctrl.sp_low || ctrl.vrs[SP] > ctrl.sp_high)
+		if (ctrl.vrs[SP] < sp_low || ctrl.vrs[SP] > ctrl.sp_high)
 			return -URC_FAILURE;
 	}
 
-- 
2.30.2




More information about the linux-arm-kernel mailing list