[PATCH] riscv/fault: Dump user opcode bytes on fatal faults
Yunhui Cui
cuiyunhui at bytedance.com
Mon Mar 27 04:56:42 PDT 2023
We encountered such a problem(logs are below). We are more curious about
which instruction execution caused the exception. After dumping it
through show_opcodes(), we found that it was caused by a floating-point
instruction.
In this way, we found the problem: in the system bringup , it is
precisely that we have not enabled the floating point function.
So when an exception occurs, it is necessary to dump the instruction
that caused the exception, like x86/fault (ba54d856a9d8).
Logs:
[ 0.822481] Run /init as init process
[ 0.837569] init[1]: unhandled signal 4 code 0x1 at 0x000000000005e028 in bb[10000+5fe000]
[ 0.932292] CPU: 0 PID: 1 Comm: init Not tainted 5.14.0-rc4-00048-g4a843c9043e8-dirty #138
[ 0.932949] Hardware name: , BIOS
[ 0.933399] epc : 000000000005e028 ra : 000000000007c7c4 sp : 0000003fffd45da0
[ 0.933855] gp : ffffffff816ea2d8 tp : 0000000000000000 t0 : 0000000000000000
[ 0.934303] t1 : 0000003fffd35df0 t2 : 0000000000000000 s0 : 0000000000000000
[ 0.934734] s1 : 0000000000000000 a0 : 0000003fffd36190 a1 : 0000003fffd45e18
[ 0.935200] a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
[ 0.935622] a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000
[ 0.936073] s2 : 0000000000000000 s3 : 0000000000000000 s4 : 0000000000000000
[ 0.936495] s5 : 0000000000000000 s6 : 0000000000000000 s7 : 0000000000000000
[ 0.936947] s8 : 0000000000000000 s9 : 0000000000000000 s10: 0000000000000000
[ 0.937487] s11: 0000000000d14980 t3 : 0000000000000000 t4 : 0000000000000000
[ 0.937954] t5 : 0000000000000000 t6 : 0000000000000000
[ 0.938510] status: 0000000200000020 badaddr: 00000000f0028053 cause: 0000000000000002
Signed-off-by: Yunhui Cui <cuiyunhui at bytedance.com>
---
arch/riscv/include/asm/bug.h | 1 +
arch/riscv/kernel/process.c | 30 ++++++++++++++++++++++++++++++
arch/riscv/kernel/traps.c | 1 +
3 files changed, 32 insertions(+)
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index d3804a2f9aad..77655dd10a2c 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -86,6 +86,7 @@ struct pt_regs;
struct task_struct;
void __show_regs(struct pt_regs *regs);
+void show_opcodes(struct pt_regs *regs);
void die(struct pt_regs *regs, const char *str);
void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 03ac3aa611f5..759dc74fe160 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -83,6 +83,36 @@ void show_regs(struct pt_regs *regs)
dump_backtrace(regs, NULL, KERN_DEFAULT);
}
+static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
+ unsigned int nbytes)
+{
+ if (!user_mode(regs))
+ return copy_from_kernel_nofault(buf, (u8 *)src, nbytes);
+
+ /* The user space code from other tasks cannot be accessed. */
+ if (regs != task_pt_regs(current))
+ return -EPERM;
+
+ return copy_from_user_nofault(buf, (void __user *)src, nbytes);
+}
+
+void show_opcodes(struct pt_regs *regs)
+{
+ u8 opcodes[4];
+
+ switch (copy_code(regs, opcodes, regs->epc, sizeof(opcodes))) {
+ case 0:
+ pr_info("Opcode: %4ph", opcodes);
+ break;
+ case -EPERM:
+ pr_err("Unable to access userspace of other tasks");
+ break;
+ default:
+ pr_err("Failed to access opcode");
+ break;
+ }
+}
+
void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 0a98fd0ddfe9..a6f6851e4e87 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -68,6 +68,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
__show_regs(regs);
+ show_opcodes(regs);
}
force_sig_fault(signo, code, (void __user *)addr);
--
2.20.1
More information about the linux-riscv
mailing list