[RFC Patch 4/6] PANIC_MCE: Introduce a new panic flag for fatal MCE, capture related information
K.Prasad
prasad at linux.vnet.ibm.com
Thu May 26 13:15:21 EDT 2011
PANIC_MCE: Introduce a new panic flag for fatal MCE, capture related information
Fatal machine check exceptions (caused due to hardware memory errors) will now
result in a 'slim' coredump that captures vital information about the MCE. This
patch introduces a new panic flag, and new parameters to *panic functions
that can capture more information pertaining to the cause of crash.
Enable a new elf-notes section to store additional information about the crash.
For MCE, enable a new notes section that captures relevant register status
(struct mce) to be later read during coredump analysis.
Signed-off-by: K.Prasad <prasad at linux.vnet.ibm.com>
---
arch/arm/kernel/traps.c | 2 +-
arch/powerpc/kernel/traps.c | 2 +-
arch/sh/kernel/traps_32.c | 2 +-
arch/x86/kernel/cpu/mcheck/mce.c | 7 +++----
arch/x86/kernel/dumpstack.c | 2 +-
include/linux/elf.h | 5 +++++
include/linux/kernel.h | 9 +++++----
include/linux/kexec.h | 9 ++++++---
kernel/kexec.c | 17 +++++++++++------
kernel/panic.c | 16 ++++++++--------
10 files changed, 42 insertions(+), 29 deletions(-)
Index: linux-2.6.slim_kdump/include/linux/kernel.h
===================================================================
--- linux-2.6.slim_kdump.orig/include/linux/kernel.h
+++ linux-2.6.slim_kdump/include/linux/kernel.h
@@ -178,17 +178,18 @@ static inline void might_fault(void)
enum panic_flags {
PANIC_NO_KEXEC = (1 << 0),
PANIC_NO_BACKTRACE = (1 << 1),
+ PANIC_MCE = (1 << 2),
};
extern struct atomic_notifier_head panic_notifier_list;
extern long (*panic_blink)(int state);
NORET_TYPE void panic(const char * fmt, ...)
__attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
-NORET_TYPE void xpanic(enum panic_flags flags, int timeout,
- const char *fmt, ...)
- __attribute__ ((NORET_AND format (printf, 3, 4))) __cold;
+NORET_TYPE void xpanic(enum panic_flags flags, int timeout, void *arch_info,
+ size_t arch_info_size, const char *fmt, ...)
+ __attribute__ ((NORET_AND format (printf, 5, 6))) __cold;
NORET_TYPE void vpanic(enum panic_flags flags, int timeout,
- const char *fmt,
+ void *arch_info, size_t arch_info_size, const char *fmt,
va_list ap) __noreturn __cold;
extern void oops_enter(void);
extern void oops_exit(void);
Index: linux-2.6.slim_kdump/kernel/panic.c
===================================================================
--- linux-2.6.slim_kdump.orig/kernel/panic.c
+++ linux-2.6.slim_kdump/kernel/panic.c
@@ -61,21 +61,21 @@ NORET_TYPE void panic(const char *fmt, .
{
va_list ap;
va_start(ap, fmt);
- vpanic(0, 0, fmt, ap);
+ vpanic(0, 0, NULL, 0, fmt, ap);
}
EXPORT_SYMBOL(panic);
-NORET_TYPE void xpanic(enum panic_flags flags, int timeout,
- const char *fmt, ...)
+NORET_TYPE void xpanic(enum panic_flags flags, int timeout, void *arch_info,
+ size_t arch_info_size, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
- vpanic(flags, timeout, fmt, ap);
+ vpanic(flags, timeout, arch_info, arch_info_size, fmt, ap);
}
EXPORT_SYMBOL(xpanic);
-NORET_TYPE void vpanic(enum panic_flags flags, int timeout,
- const char * fmt, va_list args)
+NORET_TYPE void vpanic(enum panic_flags flags, int timeout, void *arch_info,
+ size_t arch_info_size, const char * fmt, va_list args)
{
static char buf[1024];
long i, i_next = 0;
@@ -99,7 +99,7 @@ NORET_TYPE void vpanic(enum panic_flags
vsnprintf(buf, sizeof(buf), fmt, args);
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
#ifdef CONFIG_DEBUG_BUGVERBOSE
- if (!(flags & PANIC_NO_BACKTRACE))
+ if (!(flags & (PANIC_NO_BACKTRACE | PANIC_MCE)))
dump_stack();
#endif
@@ -109,7 +109,7 @@ NORET_TYPE void vpanic(enum panic_flags
* Do we want to call this before we try to display a message?
*/
if (!(flags & PANIC_NO_KEXEC))
- crash_kexec(NULL);
+ crash_kexec(NULL, arch_info, arch_info_size, flags);
kmsg_dump(KMSG_DUMP_PANIC);
Index: linux-2.6.slim_kdump/arch/x86/kernel/cpu/mcheck/mce.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/x86/kernel/cpu/mcheck/mce.c
+++ linux-2.6.slim_kdump/arch/x86/kernel/cpu/mcheck/mce.c
@@ -258,8 +258,7 @@ static void wait_for_panic(void)
local_irq_enable();
while (timeout-- > 0)
udelay(1);
- xpanic(PANIC_NO_KEXEC|PANIC_NO_BACKTRACE, 0,
- "Panicing machine check CPU died");
+ xpanic(PANIC_MCE, 0, NULL, 0, "Panicing machine check CPU died");
}
static void mce_panic(char *msg, struct mce *final, char *exp)
@@ -315,8 +314,8 @@ static void mce_panic(char *msg, struct
if (exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp);
if (!fake_panic) {
- xpanic(PANIC_NO_KEXEC|PANIC_NO_BACKTRACE, mce_panic_timeout,
- msg);
+ xpanic(PANIC_MCE, mce_panic_timeout, final,
+ sizeof(struct mce), msg);
} else
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
}
Index: linux-2.6.slim_kdump/arch/arm/kernel/traps.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/arm/kernel/traps.c
+++ linux-2.6.slim_kdump/arch/arm/kernel/traps.c
@@ -274,7 +274,7 @@ void die(const char *str, struct pt_regs
ret = __die(str, err, thread, regs);
if (regs && kexec_should_crash(thread->task))
- crash_kexec(regs);
+ crash_kexec(regs, NULL, 0, 0);
bust_spinlocks(0);
add_taint(TAINT_DIE);
Index: linux-2.6.slim_kdump/arch/powerpc/kernel/traps.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/powerpc/kernel/traps.c
+++ linux-2.6.slim_kdump/arch/powerpc/kernel/traps.c
@@ -161,7 +161,7 @@ int die(const char *str, struct pt_regs
if (kexec_should_crash(current) ||
kexec_sr_activated(smp_processor_id()))
- crash_kexec(regs);
+ crash_kexec(regs, NULL, 0, 0);
crash_kexec_secondary(regs);
if (in_interrupt())
Index: linux-2.6.slim_kdump/arch/sh/kernel/traps_32.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/sh/kernel/traps_32.c
+++ linux-2.6.slim_kdump/arch/sh/kernel/traps_32.c
@@ -106,7 +106,7 @@ void die(const char * str, struct pt_reg
oops_exit();
if (kexec_should_crash(current))
- crash_kexec(regs);
+ crash_kexec(regs, NULL, 0, 0);
if (in_interrupt())
panic("Fatal exception in interrupt");
Index: linux-2.6.slim_kdump/arch/x86/kernel/dumpstack.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/x86/kernel/dumpstack.c
+++ linux-2.6.slim_kdump/arch/x86/kernel/dumpstack.c
@@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(oops_begin);
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
if (regs && kexec_should_crash(current))
- crash_kexec(regs);
+ crash_kexec(regs, NULL, 0, 0);
bust_spinlocks(0);
die_owner = -1;
Index: linux-2.6.slim_kdump/include/linux/elf.h
===================================================================
--- linux-2.6.slim_kdump.orig/include/linux/elf.h
+++ linux-2.6.slim_kdump/include/linux/elf.h
@@ -381,6 +381,11 @@ typedef struct elf64_shdr {
#define NT_PRPSINFO 3
#define NT_TASKSTRUCT 4
#define NT_AUXV 6
+/*
+ * Although numbers 1 - 6 have been defined here, the user-space include files
+ * have numbers 1 - 20 taken up. Hence defining NT_MCE as 21.
+ */
+#define NT_MCE 21 /* Machine Check Exception related data */
#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */
Index: linux-2.6.slim_kdump/include/linux/kexec.h
===================================================================
--- linux-2.6.slim_kdump.orig/include/linux/kexec.h
+++ linux-2.6.slim_kdump/include/linux/kexec.h
@@ -125,10 +125,12 @@ extern asmlinkage long compat_sys_kexec_
#endif
extern struct page *kimage_alloc_control_pages(struct kimage *image,
unsigned int order);
-extern void crash_kexec(struct pt_regs *);
+extern void crash_kexec(struct pt_regs *, void *arch_info,
+ size_t arch_info_size, enum panic_flags);
int kexec_should_crash(struct task_struct *);
void crash_save_cpu(struct pt_regs *regs, int cpu);
-void crash_save_vmcoreinfo(void);
+void crash_save_vmcoreinfo(void *arch_info, size_t arch_info_size,
+ enum panic_flags);
void arch_crash_save_vmcoreinfo(void);
void vmcoreinfo_append_str(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
@@ -213,7 +215,8 @@ void crash_free_reserved_phys_range(unsi
#else /* !CONFIG_KEXEC */
struct pt_regs;
struct task_struct;
-static inline void crash_kexec(struct pt_regs *regs) { }
+static inline void crash_kexec(struct pt_regs *regs, void *arch_info,
+ size_t arch_info_size, enum panic_flags) { }
static inline int kexec_should_crash(struct task_struct *p) { return 0; }
#endif /* CONFIG_KEXEC */
#endif /* LINUX_KEXEC_H */
Index: linux-2.6.slim_kdump/kernel/kexec.c
===================================================================
--- linux-2.6.slim_kdump.orig/kernel/kexec.c
+++ linux-2.6.slim_kdump/kernel/kexec.c
@@ -1065,7 +1065,8 @@ asmlinkage long compat_sys_kexec_load(un
}
#endif
-void crash_kexec(struct pt_regs *regs)
+void crash_kexec(struct pt_regs *regs, void *arch_info, size_t arch_info_size,
+ enum panic_flags flags)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
@@ -1082,7 +1083,7 @@ void crash_kexec(struct pt_regs *regs)
kmsg_dump(KMSG_DUMP_KEXEC);
crash_setup_regs(&fixed_regs, regs);
- crash_save_vmcoreinfo();
+ crash_save_vmcoreinfo(arch_info, arch_info_size, flags);
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
@@ -1381,7 +1382,8 @@ int __init parse_crashkernel(char *cm
-void crash_save_vmcoreinfo(void)
+void crash_save_vmcoreinfo(void *arch_info, size_t arch_info_size,
+ enum panic_flags flags)
{
u32 *buf;
@@ -1392,9 +1394,12 @@ void crash_save_vmcoreinfo(void)
buf = (u32 *)vmcoreinfo_note;
- buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
- vmcoreinfo_size);
-
+ if (flags & PANIC_MCE)
+ buf = append_elf_note(buf, "PANIC_MCE", NT_MCE, arch_info,
+ arch_info_size);
+ else
+ buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0,
+ vmcoreinfo_data, vmcoreinfo_size);
final_note(buf);
}
More information about the kexec
mailing list