[PATCH 3/4] arm64: unwind: reference pt_regs via embedded stack frame
Mark Rutland
mark.rutland at arm.com
Mon Jul 24 10:54:46 PDT 2017
Hi Ard,
This series looks good. I've tested the whole thing with perf top -g and
sysrq-L.
In the process of reviewing this, I spotted some other (existing) bugs
in this area.
On Mon, Jul 24, 2017 at 12:26:22PM +0100, Ard Biesheuvel wrote:
> @@ -203,20 +193,13 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
> ret = unwind_frame(tsk, &frame);
> if (ret < 0)
> break;
> - stack = frame.sp;
> if (in_exception_text(where)) {
> - /*
> - * If we switched to the irq_stack before calling this
> - * exception handler, then the pt_regs will be on the
> - * task stack. The easiest way to tell is if the large
> - * pt_regs would overlap with the end of the irq_stack.
> - */
> - if (stack < irq_stack_ptr &&
> - (stack + sizeof(struct pt_regs)) > irq_stack_ptr)
> - stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
> + stack = frame.fp - offsetof(struct pt_regs, stackframe);
>
> - dump_mem("", "Exception stack", stack,
> - stack + sizeof(struct pt_regs));
> + if (on_task_stack(tsk, stack) ||
> + (tsk == current && !preemptible() && on_irq_stack(stack)))
> + dump_mem("", "Exception stack", stack,
> + stack + sizeof(struct pt_regs));
So here, we're using information from three records (where the last
might have been faked from the current FP + LR). We look at the previous
frame's LR value to determine whether the current frame's fp points to a
next frame that's associated with some regs:
+----+----------------+
| fp | interrupted PC | /* embedded in pt_regs */
+----+----------------+
/\
||
+----+----------------+
where => | fp | < entry PC > |
+----+----------------+
/\
||
+----+----------------+
| fp | where | /* "where" is in exception text */
+----+----------------+
Which (IIUC) has three problems, inherited from the existing approach.
1) Several C functions called from exception entry assembly are missing
__exception annotations. e.g. bad_mode.
So we can miss exception regs in some cases.
2) Several functions may be called either from the exception entry
assembly, or from other C code. e.g. do_undefinstr, and any cascaded
irqchip handlers.
So we can spuriously decide part of the stack is a pt_regs.
3) The entry assembly can enable exceptions before calling C code, so if
an exception is taken at the right time, these will eb in the
backtrace without a surrounding frame in the exception text.
So we can miss exception regs in some cases (distinct from (1)).
... given that, I don't think we can rely on in_exception_text(). I
think we have to look at whether the current frame's PC/LR is in
.entry.text.
Fixup to that effect below, tested with sysrq-L and perf top -g. Note
that cpu_switch_to and ret_from_fork have to be moved into .text to
avoid false positives. I also killed 'where' since it's no longer
necessary to remember the previous frame.
There's another bug -- we always dump the exception regs, even when
skipping frames. The sqsrq-L handler tries to skip frames, so you get
the regs for irq_entry, but without an irq_entry backtrace, which is
confusing. That's simple enough to fix up separately, I guess.
Thanks,
Mark.
---->8----
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 02e9035..4136168 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -60,4 +60,9 @@ static inline int in_exception_text(unsigned long ptr)
return in ? : __in_irqentry_text(ptr);
}
+static inline int in_entry_text(unsigned long ptr)
+{
+ return ptr >= (unsigned long)&__entry_text_start &&
+ ptr < (unsigned long)&__entry_text_end;
+}
#endif
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 32f3b24..3d25ec5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -707,38 +707,6 @@ el0_irq_naked:
ENDPROC(el0_irq)
/*
- * Register switch for AArch64. The callee-saved registers need to be saved
- * and restored. On entry:
- * x0 = previous task_struct (must be preserved across the switch)
- * x1 = next task_struct
- * Previous and next are guaranteed not to be the same.
- *
- */
-ENTRY(cpu_switch_to)
- mov x10, #THREAD_CPU_CONTEXT
- add x8, x0, x10
- mov x9, sp
- stp x19, x20, [x8], #16 // store callee-saved registers
- stp x21, x22, [x8], #16
- stp x23, x24, [x8], #16
- stp x25, x26, [x8], #16
- stp x27, x28, [x8], #16
- stp x29, x9, [x8], #16
- str lr, [x8]
- add x8, x1, x10
- ldp x19, x20, [x8], #16 // restore callee-saved registers
- ldp x21, x22, [x8], #16
- ldp x23, x24, [x8], #16
- ldp x25, x26, [x8], #16
- ldp x27, x28, [x8], #16
- ldp x29, x9, [x8], #16
- ldr lr, [x8]
- mov sp, x9
- msr sp_el0, x1
- ret
-ENDPROC(cpu_switch_to)
-
-/*
* This is the fast syscall return path. We do as little as possible here,
* and this includes saving x0 back into the kernel stack.
*/
@@ -781,18 +749,6 @@ finish_ret_to_user:
ENDPROC(ret_to_user)
/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
- bl schedule_tail
- cbz x19, 1f // not a kernel thread
- mov x0, x20
- blr x19
-1: get_thread_info tsk
- b ret_to_user
-ENDPROC(ret_from_fork)
-
-/*
* SVC handler.
*/
.align 6
@@ -865,3 +821,47 @@ ENTRY(sys_rt_sigreturn_wrapper)
mov x0, sp
b sys_rt_sigreturn
ENDPROC(sys_rt_sigreturn_wrapper)
+
+/*
+ * Register switch for AArch64. The callee-saved registers need to be saved
+ * and restored. On entry:
+ * x0 = previous task_struct (must be preserved across the switch)
+ * x1 = next task_struct
+ * Previous and next are guaranteed not to be the same.
+ *
+ */
+ENTRY(cpu_switch_to)
+ mov x10, #THREAD_CPU_CONTEXT
+ add x8, x0, x10
+ mov x9, sp
+ stp x19, x20, [x8], #16 // store callee-saved registers
+ stp x21, x22, [x8], #16
+ stp x23, x24, [x8], #16
+ stp x25, x26, [x8], #16
+ stp x27, x28, [x8], #16
+ stp x29, x9, [x8], #16
+ str lr, [x8]
+ add x8, x1, x10
+ ldp x19, x20, [x8], #16 // restore callee-saved registers
+ ldp x21, x22, [x8], #16
+ ldp x23, x24, [x8], #16
+ ldp x25, x26, [x8], #16
+ ldp x27, x28, [x8], #16
+ ldp x29, x9, [x8], #16
+ ldr lr, [x8]
+ mov sp, x9
+ msr sp_el0, x1
+ ret
+ENDPROC(cpu_switch_to)
+
+/*
+ * This is how we return from a fork.
+ */
+ENTRY(ret_from_fork)
+ bl schedule_tail
+ cbz x19, 1f // not a kernel thread
+ mov x0, x20
+ blr x19
+1: get_thread_info tsk
+ b ret_to_user
+ENDPROC(ret_from_fork)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 9ba060b..63cbfb0 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -170,13 +170,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
skip = !!regs;
printk("Call trace:\n");
while (1) {
- unsigned long where = frame.pc;
unsigned long stack;
int ret;
/* skip until specified stack frame */
if (!skip) {
- dump_backtrace_entry(where);
+ dump_backtrace_entry(frame.pc);
} else if (frame.fp == regs->regs[29]) {
skip = 0;
/*
@@ -191,7 +190,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
ret = unwind_frame(tsk, &frame);
if (ret < 0)
break;
- if (in_exception_text(where)) {
+ if (in_entry_text(frame.pc)) {
stack = frame.fp - offsetof(struct pt_regs, stackframe);
if (on_task_stack(tsk, stack) ||
More information about the linux-arm-kernel
mailing list