x86 remap allocator in kernel 3.0

Petr Tesarik ptesarik at suse.cz
Fri Jan 13 09:39:03 EST 2012


Dne Pa 13. ledna 2012 04:22:50 tachibana at mxm.nes.nec.co.jp napsal(a):
> Hi Petr,
> 
> On 2012/01/12 11:40:08 +0100, Petr Tesarik <ptesarik at suse.cz> wrote:
> > Dne ?t 12. ledna 2012 09:16:06 Atsushi Kumagai napsal(a):
> > > Hello Petr,
> > > 
> > > On Tue, 10 Jan 2012 19:14:32 +0100
> > > 
> > > Petr Tesarik <ptesarik at suse.cz> wrote:
> > > > Ken'ichi Ohmichi, please note that makedumpfile is also affected by
> > > > this deficiency. On my test system, it will fail to produce any
> > > > output if I set dump level to anything greater than zero:
> > > > 
> > > > makedumpfile -c -d 31 -x vmlinux-3.0.13-0.5-pae.debug vmcore kdump.31
> > > > readmem: Can't convert a physical address(34a012b4) to offset.
> > > > readmem: type_addr: 0, addr:f4a012b4, size:4
> > > > get_mm_discontigmem: Can't get node_start_pfn.
> > > > 
> > > > makedumpfile Failed.
> > > > 
> > > > However, fixing this for makedumpfile is harder, and it will most
> > > > likely require a few more lines in VMCOREINFO, because debug symbols
> > > > may not be available at dump time, and I can't see any alternative
> > > > method to locate the remapped regions.
> > > 
> > > Thank you for your indication.
> > > 
> > > Could you send me your kernel configuration so that I can reproduce the
> > > issue ?
> > 
> > Attached. The most important settings are:
> > 
> > CONFIG_X86_32=y
> > CONFIG_DISCONTIGMEM_MANUAL=y
> > 
> > This also depends on CONFIG_NUMA=y
> > 
> > FYI my test system runs 3.0.15 (because that's used for SLES11 SP2), but
> > the same issue also exists in any later version.
> > 
> > Petr Tesarik
> > SUSE Linux
> 
> Thank you for the config. I will fix makedumpfile. However it will take
> time to solve various issues because I've never run makedumpfile on 3.0.X.

This patch works for me, at least when using the "-x" command-line switch. To 
make it perfect, we'll also need an accompanying patch to the kernel to store 
the appropriate values in VMCOREINFO.

Signed-off-by: Petr Tesarik <ptesarik at suse.cz>

---
 arch/x86.c     |   65 
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 makedumpfile.c |   16 ++++++++++++++
 makedumpfile.h |    4 +++
 3 files changed, 85 insertions(+)

--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -831,6 +831,9 @@ get_symbol_info(void)
 	if (SYMBOL(cpu_online_mask) == NOT_FOUND_SYMBOL)
 		SYMBOL_INIT(cpu_online_mask, "cpu_online_map");
 	SYMBOL_INIT(kexec_crash_image, "kexec_crash_image");
+	SYMBOL_INIT(node_remap_start_vaddr, "node_remap_start_vaddr");
+	SYMBOL_INIT(node_remap_end_vaddr, "node_remap_end_vaddr");
+	SYMBOL_INIT(node_remap_start_pfn, "node_remap_start_pfn");
 
 	if (SYMBOL(node_data) != NOT_FOUND_SYMBOL)
 		SYMBOL_ARRAY_TYPE_INIT(node_data, "node_data");
@@ -842,6 +845,9 @@ get_symbol_info(void)
 		SYMBOL_ARRAY_LENGTH_INIT(node_memblk, "node_memblk");
 	if (SYMBOL(__per_cpu_offset) != NOT_FOUND_SYMBOL)
 		SYMBOL_ARRAY_LENGTH_INIT(__per_cpu_offset, "__per_cpu_offset");
+	if (SYMBOL(node_remap_start_pfn) != NOT_FOUND_SYMBOL)
+		SYMBOL_ARRAY_LENGTH_INIT(node_remap_start_pfn,
+					"node_remap_start_pfn");
 
 	return TRUE;
 }
@@ -1290,6 +1296,9 @@ write_vmcoreinfo_data(void)
 	WRITE_SYMBOL("log_end", log_end);
 	WRITE_SYMBOL("max_pfn", max_pfn);
 	WRITE_SYMBOL("high_memory", high_memory);
+	WRITE_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr);
+	WRITE_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr);
+	WRITE_SYMBOL("node_remap_start_pfn", node_remap_start_pfn);
 
 	/*
 	 * write the structure size of 1st kernel
@@ -1341,6 +1350,9 @@ write_vmcoreinfo_data(void)
 		WRITE_ARRAY_LENGTH("mem_section", mem_section);
 	if (SYMBOL(node_memblk) != NOT_FOUND_SYMBOL)
 		WRITE_ARRAY_LENGTH("node_memblk", node_memblk);
+	if (SYMBOL(node_remap_start_pfn) != NOT_FOUND_SYMBOL)
+		WRITE_ARRAY_LENGTH("node_remap_start_pfn",
+				   node_remap_start_pfn);
 
 	WRITE_ARRAY_LENGTH("zone.free_area", zone.free_area);
 	WRITE_ARRAY_LENGTH("free_area.free_list", free_area.free_list);
@@ -1586,6 +1598,9 @@ read_vmcoreinfo(void)
 	READ_SYMBOL("log_end", log_end);
 	READ_SYMBOL("max_pfn", max_pfn);
 	READ_SYMBOL("high_memory", high_memory);
+	READ_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr);
+	READ_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr);
+	READ_SYMBOL("node_remap_start_pfn", node_remap_start_pfn);
 
 	READ_STRUCTURE_SIZE("page", page);
 	READ_STRUCTURE_SIZE("mem_section", mem_section);
@@ -1628,6 +1643,7 @@ read_vmcoreinfo(void)
 	READ_ARRAY_LENGTH("node_memblk", node_memblk);
 	READ_ARRAY_LENGTH("zone.free_area", zone.free_area);
 	READ_ARRAY_LENGTH("free_area.free_list", free_area.free_list);
+	READ_ARRAY_LENGTH("node_remap_start_pfn", node_remap_start_pfn);
 
 	READ_NUMBER("NR_FREE_PAGES", NR_FREE_PAGES);
 	READ_NUMBER("N_ONLINE", N_ONLINE);
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -961,6 +961,9 @@ struct symbol_table {
 	unsigned long long	log_buf_len;
 	unsigned long long	log_end;
 	unsigned long long	max_pfn;
+	unsigned long long	node_remap_start_vaddr;
+	unsigned long long	node_remap_end_vaddr;
+	unsigned long long	node_remap_start_pfn;
 
 	/*
 	 * for Xen extraction
@@ -1179,6 +1182,7 @@ struct array_table {
 	long	mem_section;
 	long	node_memblk;
 	long	__per_cpu_offset;
+	long	node_remap_start_pfn;
 
 	/*
 	 * Structure
--- a/arch/x86.c
+++ b/arch/x86.c
@@ -19,6 +19,53 @@
 #include "../elf_info.h"
 #include "../makedumpfile.h"
 
+static int max_numnodes;
+static unsigned long *remap_start_vaddr;
+static unsigned long *remap_end_vaddr;
+static unsigned long *remap_start_pfn;
+
+static int
+remap_init(void)
+{
+	int n;
+
+	if (SYMBOL(node_remap_start_vaddr) == NOT_FOUND_SYMBOL)
+		return TRUE;
+	if (SYMBOL(node_remap_end_vaddr) == NOT_FOUND_SYMBOL)
+		return TRUE;
+	if (SYMBOL(node_remap_start_pfn) == NOT_FOUND_SYMBOL)
+		return TRUE;
+	if (ARRAY_LENGTH(node_remap_start_pfn) == NOT_FOUND_STRUCTURE)
+		return TRUE;
+
+	n = ARRAY_LENGTH(node_remap_start_pfn);
+	remap_start_vaddr = calloc(3 * n, sizeof(unsigned long));
+	if (!remap_start_vaddr) {
+		ERRMSG("Can't allocate remap allocator info.\n");
+		return FALSE;
+	}
+	remap_end_vaddr = remap_start_vaddr + n;
+	remap_start_pfn = remap_end_vaddr + n;
+
+	if (!readmem(VADDR, SYMBOL(node_remap_start_vaddr), remap_start_vaddr,
+		     n * sizeof(unsigned long))) {
+		ERRMSG("Can't get node_remap_start_vaddr.\n");
+		return FALSE;
+	}
+	if (!readmem(VADDR, SYMBOL(node_remap_end_vaddr), remap_end_vaddr,
+		     n * sizeof(unsigned long))) {
+		ERRMSG("Can't get node_remap_end_vaddr.\n");
+		return FALSE;
+	}
+	if (!readmem(VADDR, SYMBOL(node_remap_start_pfn), remap_start_pfn,
+		     n * sizeof(unsigned long))) {
+		ERRMSG("Can't get node_remap_start_pfn.\n");
+		return FALSE;
+	}
+
+	max_numnodes = n;
+}
+
 int
 get_machdep_info_x86(void)
 {
@@ -48,6 +95,9 @@ get_machdep_info_x86(void)
 	info->kernel_start = SYMBOL(_stext) & ~KVBASE_MASK;
 	DEBUG_MSG("kernel_start : %lx\n", info->kernel_start);
 
+	if (!remap_init())
+		return FALSE;
+
 	/*
 	 * For the compatibility, makedumpfile should run without the symbol
 	 * vmlist and the offset of vm_struct.addr if they are not necessary.
@@ -90,6 +140,18 @@ get_versiondep_info_x86(void)
 }
 
 unsigned long long
+vtop_x86_remap(unsigned long vaddr)
+{
+	int i;
+	for (i = 0; i < max_numnodes; ++i)
+		if (vaddr >= remap_start_vaddr[i] &&
+		    vaddr < remap_end_vaddr[i])
+			return pfn_to_paddr(remap_start_pfn[i]) +
+				vaddr - remap_start_vaddr[i];
+	return NOT_PADDR;
+}
+
+unsigned long long
 vtop_x86_PAE(unsigned long vaddr)
 {
 	unsigned long long page_dir, pgd_pte, pmd_paddr, pmd_pte;
@@ -152,6 +214,9 @@ vaddr_to_paddr_x86(unsigned long vaddr)
 {
 	unsigned long long paddr;
 
+	if ((paddr = vtop_x86_remap(vaddr)) != NOT_PADDR)
+		return paddr;
+
 	if ((paddr = vaddr_to_paddr_general(vaddr)) != NOT_PADDR)
 		return paddr;
 



More information about the kexec mailing list