/sys crash_notes provides bad value to kexec
John Blackwood
john.blackwood at ccur.com
Mon Oct 26 12:19:55 EDT 2009
Hello,
When attempting to generate a crash file on a on newer (2.6.31.x) NUMA
x86_64 kernel, the kdump kernel was unable to initialize the /proc/vmcore
file due to a bad physical address specified in the elf header for a
per-cpu crash notes area.
It turns out that the physical address that kexec reads from the output
of the:
/sys/devices/system/cpu/cpu1/crash_notes
sysfs file is not correct for NUMA x86_64 architecture systems, and this
physical address is used in the elfheader that the kdump kernel attempts
to use to initialize /proc/vmcore.
I believe that this has to do with the new percpu_alloc=lpage and
percpu_alloc=4k per-cpu setups that are now used.
In those cases, the __pa(per_cpu_ptr(crash_notes, cpunum)) does not
return the correct physical address value.
I did a rough stab at getting the correct physical address for the
'lpage' case (which I believe tends to be the default method used),
but I was unable to figure out how to get the correct physical address
for the '4k' page case.
For what ever it's worth, here's a patch of my attempt at the lpage version;
it might or might not be useful.
( This patch really assumes only x86 or x86_64 builds, since
the asm/percpu.h header file is only for x86 arch. )
Thank you.
diff -rup a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
--- a/arch/x86/include/asm/percpu.h 2009-10-26 09:33:37.000000000 -0500
+++ b/arch/x86/include/asm/percpu.h 2009-10-26 09:33:53.000000000 -0500
@@ -165,6 +165,15 @@ static inline void *pcpu_lpage_remapped(
}
#endif
+#if defined(CONFIG_NEED_MULTIPLE_NODES) && defined(CONFIG_X86_64)
+unsigned long long pcpul_get_paddr(int cpunum, void *item);
+#else
+static inline unsigned long long pcpul_get_paddr(int cpunum, void *item)
+{
+ return (unsigned long long)NULL;
+}
+#endif
+
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_SMP
diff -rup a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
--- a/arch/x86/kernel/setup_percpu.c 2009-10-26 09:33:37.000000000 -0500
+++ b/arch/x86/kernel/setup_percpu.c 2009-10-26 09:33:53.000000000 -0500
@@ -314,6 +314,35 @@ void *pcpu_lpage_remapped(void *kaddr)
return NULL;
}
+
+#ifdef CONFIG_X86_64
+/*
+ * Return the physical address of the percpu data item for the
+ * specified cpu.
+ *
+ * Returns a physical address or NULL if pcpul_map is not being used.
+ * Currently only called by show_crash_notes().
+ */
+unsigned long long pcpul_get_paddr(int cpunum, void *item)
+{
+ struct pcpul_ent *pmp;
+ void *vaddr, *offset;
+ unsigned long long paddr = (unsigned long long)NULL;
+
+ if (!pcpul_map)
+ return paddr;
+ for (pmp = pcpul_map; pmp->ptr; pmp++) {
+ if ((int)pmp->cpu != cpunum)
+ continue;
+ offset = per_cpu_ptr(item, cpunum) - __per_cpu_offset[cpunum];
+ vaddr = pmp->ptr + (long unsigned int)offset;
+ paddr = __pa(vaddr);
+ return paddr;
+ }
+ return paddr;
+}
+#endif
+
#else
static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
{
diff -rup a/drivers/base/cpu.c b/drivers/base/cpu.c
--- a/drivers/base/cpu.c 2009-10-26 09:33:37.000000000 -0500
+++ b/drivers/base/cpu.c 2009-10-26 09:33:53.000000000 -0500
@@ -97,6 +97,12 @@ static ssize_t show_crash_notes(struct s
* boot up and this data does not change there after. Hence this
* operation should be safe. No locking required.
*/
+ addr = pcpul_get_paddr(cpunum, crash_notes);
+ if (addr) {
+ rc = sprintf(buf, "%Lx\n", addr);
+ return rc;
+ }
+
addr = __pa(per_cpu_ptr(crash_notes, cpunum));
rc = sprintf(buf, "%Lx\n", addr);
return rc;
More information about the kexec
mailing list