[PATCH] [makedumpfile] Implement memory regions on IA64

Bernhard Walle bwalle at suse.de
Tue May 15 18:08:45 EDT 2007


Hello,

* Bernhard Walle <bwalle at suse.de> [2007-05-14 23:49]:
> 
> I'm working on support for the 4 layer pagetable.

Here's my first attempt. It works with SLES 10, SP1 on a Tiger4
machine with 16 GiB of memory. (I had still problems with a big
SGI machine when creating the bitmap. I'm investigating this, too.)

The problem is not to implement the 4 layer page table, but to
*detect* it. crash uses the built-in configuration data in the kernel
image, and that's what I used in my patch. If you have a better and
still reliable method, I'm open to suggestions. :-)

Also, I only use the page table translation when the memory is
vmalloc'd. That's the same way crash does it and that should be
well-tested.

The patch requires your two patches. Please give me your feedback!
And I hope I didn't accidentally break the 3 layer page table.


Thanks,
   Bernhard



---
 ia64.c         |  148 +++++++++++++++++++++++++++++++++++++++++++++---
 makedumpfile.c |  174 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 makedumpfile.h |   43 +++++++++++++-
 3 files changed, 354 insertions(+), 11 deletions(-)

--- a/ia64.c
+++ b/ia64.c
@@ -3,6 +3,9 @@
  *
  * Copyright (C) 2006  NEC Corporation
  *
+ * Some parts are taken and adapted from the crash-utility,
+ * (c) by RedHat Inc.
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -17,6 +20,18 @@
 
 #include "makedumpfile.h"
 
+
+/*
+ *  vmalloc() starting address is either the traditional 0xa000000000000000 or
+ *  bumped up in 2.6 to 0xa000000200000000.
+ */
+int
+is_vmalloc_addr_ia64(struct DumpInfo *info, unsigned long vaddr)
+{
+	return ((vaddr >= info->vmalloc_start) &&
+			(vaddr < (ulong)KERNEL_UNCACHED_BASE));
+}
+
 int
 get_phys_base_ia64(struct DumpInfo *info)
 {
@@ -51,18 +66,80 @@ get_machdep_info_ia64(struct DumpInfo *i
 }
 
 unsigned long
-ia64_vtop(struct DumpInfo *info, unsigned long long vaddr)
+ia64_vtop3(struct DumpInfo *info, unsigned long long vaddr)
 {
 	unsigned long paddr = 0, temp, page_dir, pgd_pte, page_middle, pmd_pte;
 	unsigned long page_table, pte;
 
-	if (VADDR_REGION(vaddr) != KERNEL_VMALLOC_REGION) {
-		ERRMSG("vaddr(%llx) is not KERNEL_VMALLOC_REGION.\n", vaddr);
+
+	/*
+	 * Translate a virtual address to a physical address
+	 * by using Layer 3 paging.
+	 */
+	if (SYMBOL(swapper_pg_dir) == NOT_FOUND_SYMBOL) {
+		ERRMSG("Can't get the symbol of swapper_pg_dir.\n");
 		return paddr;
 	}
-	paddr = vaddr_to_paddr(info, vaddr);
-	if (paddr)
+
+	/*
+	 * Get PGD
+	 */
+	temp = vaddr & MASK_PGD_3L;
+	temp = temp >> (PGDIR_SHIFT_3L - 3);
+	page_dir = SYMBOL(swapper_pg_dir) + temp;
+	if (!readmem(info, page_dir, &pgd_pte, sizeof pgd_pte)) {
+		ERRMSG("Can't get pgd_pte (page_dir:%lx).\n", page_dir);
 		return paddr;
+	}
+
+	/*
+	 * Get PMD
+	 */
+	temp = vaddr & MASK_PMD;
+	temp = temp >> (PMD_SHIFT - 3);
+	page_middle = pgd_pte + temp;
+	/*
+	 * Convert physical address to virtual address
+	 */
+	page_middle = paddr_to_vaddr(info, page_middle);
+	if (!readmem(info, page_middle, &pmd_pte, sizeof pmd_pte)) {
+		ERRMSG("Can't get pmd_pte (page_middle:%lx).\n", page_middle);
+		return paddr;
+	}
+
+	/*
+	 * Get PTE
+	 */
+	temp = vaddr & MASK_PTE;
+	temp = temp >> (PAGE_SHIFT - 3);
+	page_table = pmd_pte + temp;
+	/*
+	 * Convert physical address to virtual address
+	 */
+	page_table = paddr_to_vaddr(info, page_table);
+	if (!readmem(info, page_table, &pte, sizeof pte)) {
+		ERRMSG("Can't get pte (page_table:%lx).\n", page_table);
+		return paddr;
+	}
+
+	/*
+	 * Get physical address
+	 */
+	temp = vaddr & MASK_POFFSET;
+	paddr = (pte & _PAGE_PPN_MASK) + temp;
+	if (info->flag_debug) {
+		MSG("vaddr:%llx -> paddr:%lx\n", vaddr, paddr);
+	}
+
+	return paddr;
+}
+
+unsigned long
+ia64_vtop4(struct DumpInfo *info, unsigned long long vaddr)
+{
+	unsigned long paddr = 0, temp, page_dir, pgd_pte, page_upper,
+		      pud_pte, page_middle, pmd_pte;
+	unsigned long page_table, pte;
 
 	/*
 	 * Translate a virtual address to a physical address
@@ -76,8 +153,8 @@ ia64_vtop(struct DumpInfo *info, unsigne
 	/*
 	 * Get PGD
 	 */
-	temp = vaddr & MASK_PGD;
-	temp = temp >> (PGDIR_SHIFT - 3);
+	temp = vaddr & MASK_PGD_4L;
+	temp = temp >> (PGDIR_SHIFT_4L - 3);
 	page_dir = SYMBOL(swapper_pg_dir) + temp;
 	if (!readmem(info, page_dir, &pgd_pte, sizeof pgd_pte)) {
 		ERRMSG("Can't get pgd_pte (page_dir:%lx).\n", page_dir);
@@ -85,11 +162,26 @@ ia64_vtop(struct DumpInfo *info, unsigne
 	}
 
 	/*
+	 * Get PUD
+	 */
+	temp = vaddr & MASK_PUD;
+	temp = temp >> (PUD_SHIFT - 3);
+	page_upper = pgd_pte + temp;
+	/*
+	 * Convert physical address to virtual address
+	 */
+	page_upper = paddr_to_vaddr(info, page_upper);
+	if (!readmem(info, page_upper, &pud_pte, sizeof pud_pte)) {
+		ERRMSG("Can't get pud_pte (page_upper:%lx).\n", page_upper);
+		return paddr;
+	}
+
+	/*
 	 * Get PMD
 	 */
 	temp = vaddr & MASK_PMD;
 	temp = temp >> (PMD_SHIFT - 3);
-	page_middle = pgd_pte + temp;
+	page_middle = pud_pte + temp;
 	/*
 	 * Convert physical address to virtual address
 	 */
@@ -126,6 +218,30 @@ ia64_vtop(struct DumpInfo *info, unsigne
 	return paddr;
 }
 
+unsigned long
+ia64_vtop(struct DumpInfo *info, unsigned long long vaddr)
+{
+	unsigned long paddr = 0;
+
+	if (VADDR_REGION(vaddr) != KERNEL_VMALLOC_REGION) {
+		ERRMSG("vaddr(%llx) is not KERNEL_VMALLOC_REGION.\n", vaddr);
+		return paddr;
+	}
+	paddr = vaddr_to_paddr(info, vaddr);
+	if (paddr)
+		return paddr;
+
+	if (!is_vmalloc_addr_ia64(info, vaddr)) {
+		paddr = vaddr - info->kernel_start +
+			(info->phys_base & KERNEL_TR_PAGE_MASK);
+		return paddr;
+	}
+
+	if (info->mem_flags & MEMORY_4LAYER_PAGETABLE)
+		return ia64_vtop4(info, vaddr);
+	else
+		return ia64_vtop3(info, vaddr);
+}
 
 /*
  * Convert Virtual Address to File Offest.
@@ -170,5 +286,21 @@ vaddr_to_offset_ia64(struct DumpInfo *in
 	return offset;
 }
 
+int
+get_machdep_kernel_start_ia64(struct DumpInfo *info)
+{
+	if (SYMBOL(_stext) == NOT_FOUND_SYMBOL)
+		return FALSE;
+
+	info->kernel_start = SYMBOL(_stext);
+
+	if (VADDR_REGION(info->kernel_start) == KERNEL_VMALLOC_REGION)
+		info->vmalloc_start = info->kernel_start + 4*1024UL*1024UL*1024UL;
+	else
+		info->vmalloc_start = KERNEL_VMALLOC_BASE;
+
+	return TRUE;
+}
+
 #endif /* ia64 */
 
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -1420,6 +1420,174 @@ get_symbol_info(struct DumpInfo *info)
 	return TRUE;
 }
 
+
+int
+read_kernel_config(struct DumpInfo *info)
+{
+	int ii, ret, end, found=0;
+	unsigned long size, bufsz;
+	char *pos, *ln, *buf, *head, *tail, *val, *uncomp;
+	char line[512];
+	z_stream stream;
+	unsigned long kernel_config_data;
+
+	kernel_config_data = get_symbol_addr(info, "kernel_config_data");
+	if (kernel_config_data <= 0) {
+		ERRMSG("Can't read kernel cofiguration from kernel binary");
+		return FALSE;
+	}
+
+	/* We don't know how large IKCONFIG is, so we start with
+	 * 32k, if we can't find MAGIC_END assume we didn't read
+	 * enough, double it and try again.
+	 */
+	ii = 32;
+
+again:
+	size = ii * 1024;
+
+	if ((buf = (char *)malloc(size)) == NULL) {
+		MSG("cannot malloc IKCONFIG input buffer\n");
+		return FALSE;
+	}
+
+        if (!readmem(info, kernel_config_data, buf, size)) {
+		MSG("cannot read kernel_config_data\n");
+		goto out2;
+	}
+
+	/* Find the start */
+	if (strstr(buf, MAGIC_START))
+		head = buf + MAGIC_SIZE + 10; /* skip past MAGIC_START and gzip header */
+	else {
+		MSG("could not find MAGIC_START!\n");
+		goto out2;
+	}
+
+	tail = head;
+
+	end = strlen(MAGIC_END);
+
+	/* Find the end*/
+	while (tail < (buf + (size - 1))) {
+
+		if (strncmp(tail, MAGIC_END, end)==0) {
+			found = 1;
+			break;
+		}
+		tail++;
+	}
+
+	if (found) {
+		bufsz = tail - head;
+		size = 10 * bufsz;
+		if ((uncomp = (char *)malloc(size)) == NULL) {
+			MSG("cannot malloc IKCONFIG output buffer\n");
+			goto out2;
+		}
+	} else {
+		if (ii > 512) {
+			MSG("could not find MAGIC_END!\n");
+			goto out2;
+		} else {
+			free(buf);
+			ii *= 2;
+			goto again;
+		}
+	}
+
+
+	/* initialize zlib */
+	stream.next_in = (Bytef *)head;
+	stream.avail_in = (uInt)bufsz;
+
+	stream.next_out = (Bytef *)uncomp;
+	stream.avail_out = (uInt)size;
+
+	stream.zalloc = NULL;
+	stream.zfree = NULL;
+	stream.opaque = NULL;
+
+	ret = inflateInit2(&stream, -MAX_WBITS);
+	if (ret != Z_OK) {
+		ERRMSG("error while reading kernel config, inflateInit2 "
+				"returned %d\n", ret);
+		goto out1;
+	}
+
+	ret = inflate(&stream, Z_FINISH);
+
+	if (ret != Z_STREAM_END) {
+		inflateEnd(&stream);
+		if (ret == Z_NEED_DICT ||
+		   (ret == Z_BUF_ERROR && stream.avail_in == 0)) {
+			ERRMSG("error while reading kernel config, stream.avail_in = 0,"
+					"inflate returned %d\n", ret);
+			goto out1;
+		}
+		ERRMSG("error while reading kernel config, inflate returned"
+				"with %d\n", ret);
+		goto out1;
+	}
+	size = stream.total_out;
+
+	ret = inflateEnd(&stream);
+
+	pos = uncomp;
+
+	do {
+		ret = sscanf(pos, "%511[^\n]\n%n", line, &ii);
+		if (ret > 0) {
+			pos += ii;
+
+			ln = line;
+
+			/* skip leading whitespace */
+			while (is_blank(*ln))
+				ln++;
+
+			/* skip comments */
+			if (*ln == '#')
+				continue;
+
+			/* Find '=' */
+			if ((head = strchr(ln, '=')) != NULL) {
+				*head = '\0';
+				val = head + 1;
+
+				head--;
+
+				/* skip trailing whitespace */
+				while (is_blank(*head)) {
+					*head = '\0';
+					head--;
+				}
+
+				/* skip whitespace */
+				while (is_blank(*val))
+					val++;
+
+			} else /* Bad line, skip it */
+				continue;
+
+			if (strcmp(ln, "CONFIG_PGTABLE_4") == 0)
+				info->mem_flags |= MEMORY_4LAYER_PAGETABLE;
+		}
+	} while (ret > 0);
+
+
+	free(uncomp);
+	free(buf);
+	return TRUE;
+
+out1:
+	free(uncomp);
+out2:
+	free(buf);
+
+	return FALSE;
+}
+
 int
 get_structure_info(struct DumpInfo *info)
 {
@@ -2289,9 +2457,15 @@ initial(struct DumpInfo *info)
 			return FALSE;
 	}
 
+	if (!get_machdep_kernel_start(info))
+		return FALSE;
+
 	if (!check_release(info))
 		return FALSE;
 
+	if (!read_kernel_config(info))
+		return FALSE;
+
 	if (!get_machdep_info(info))
 		return FALSE;
 
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -29,6 +29,7 @@
 #include <libelf.h>
 #include <dwarf.h>
 #include <byteswap.h>
+#include <ctype.h>
 #include "diskdump_mod.h"
 
 /*
@@ -64,6 +65,19 @@ enum {
 #define LSEEKED_PDESC	(2)
 #define LSEEKED_PDATA	(3)
 
+/*
+ * Flags
+ */
+#define MEMORY_4LAYER_PAGETABLE	(1 << 0)
+
+/*
+ * For kernel configuration
+ */
+#define MAGIC_START  "IKCFG_ST"
+#define MAGIC_END    "IKCFG_ED"
+#define MAGIC_SIZE   (sizeof(MAGIC_START) - 1)
+
+
 static inline int
 test_bit(int nr, unsigned long addr)
 {
@@ -73,6 +87,12 @@ test_bit(int nr, unsigned long addr)
 	return ((mask & addr) != 0);
 }
 
+static inline int
+is_blank(int c)
+{
+	return c == ' ' || c == '\t';
+}
+
 #define isLRU(flags)		test_bit(PG_lru, flags)
 #define isPrivate(flags)	test_bit(PG_private, flags)
 #define isSwapCache(flags)	test_bit(PG_swapcache, flags)
@@ -376,12 +396,21 @@ do { \
 #define PTRS_PER_PTD_SHIFT	(PAGE_SHIFT - 3)
 
 #define PMD_SHIFT		(PAGE_SHIFT + PTRS_PER_PTD_SHIFT)
-#define PGDIR_SHIFT		(PMD_SHIFT  + PTRS_PER_PTD_SHIFT)
+#define PGDIR_SHIFT_3L		(PMD_SHIFT  + PTRS_PER_PTD_SHIFT)
 
 #define MASK_POFFSET	((1UL << PAGE_SHIFT) - 1)
 #define MASK_PTE	((1UL << PMD_SHIFT) - 1) &~((1UL << PAGE_SHIFT) - 1)
-#define MASK_PMD	((1UL << PGDIR_SHIFT) - 1) &~((1UL << PMD_SHIFT) - 1)
-#define MASK_PGD	((1UL << REGION_SHIFT) - 1) & (~((1UL << PGDIR_SHIFT) - 1))
+#define MASK_PMD	((1UL << PGDIR_SHIFT_3L) - 1) &~((1UL << PMD_SHIFT) - 1)
+#define MASK_PGD_3L	((1UL << REGION_SHIFT) - 1) & (~((1UL << PGDIR_SHIFT_3L) - 1))
+
+/*
+ * Layer 4 paging
+ */
+#define PUD_SHIFT		(PMD_SHIFT + PTRS_PER_PTD_SHIFT)
+#define PGDIR_SHIFT_4L		(PUD_SHIFT + PTRS_PER_PTD_SHIFT)
+
+#define MASK_PUD   	((1UL << REGION_SHIFT) - 1) & (~((1UL << PUD_SHIFT) - 1))
+#define MASK_PGD_4L	((1UL << REGION_SHIFT) - 1) & (~((1UL << PGDIR_SHIFT_4L) - 1))
 
 #endif          /* ia64 */
 
@@ -393,6 +422,7 @@ int get_machdep_info_x86();
 #define get_phys_base(X)	TRUE
 #define get_machdep_info(X)	get_machdep_info_x86(X)
 #define vaddr_to_offset(X, Y)	vaddr_to_offset_general(X,Y)
+#define get_machdep_kernel_start(X)	TRUE
 #endif /* x86 */
 
 #ifdef __x86_64__
@@ -402,6 +432,7 @@ off_t vaddr_to_offset_x86_64();
 #define get_phys_base(X)	get_phys_base_x86_64(X)
 #define get_machdep_info(X)	get_machdep_info_x86_64(X)
 #define vaddr_to_offset(X, Y)	vaddr_to_offset_x86_64(X, Y)
+#define get_machdep_kernel_start(X)	TRUE
 #endif /* x86_64 */
 
 #ifdef __powerpc__ /* powerpc */
@@ -409,16 +440,19 @@ int get_machdep_info_ppc64();
 #define get_machdep_info(X)	get_machdep_info_ppc64(X)
 #define get_phys_base(X)	TRUE
 #define vaddr_to_offset(X, Y)	vaddr_to_offset_general(X, Y)
+#define get_machdep_kernel_start(X)	TRUE
 #endif          /* powerpc */
 
 #ifdef __ia64__ /* ia64 */
 int get_phys_base_ia64();
 int get_machdep_info_ia64();
+int get_machdep_kernel_start_ia64();
 off_t vaddr_to_offset_ia64();
 #define get_machdep_info(X)	get_machdep_info_ia64(X)
 #define get_phys_base(X)	get_phys_base_ia64(X)
 #define vaddr_to_offset(X, Y)	vaddr_to_offset_ia64(X, Y)
 #define VADDR_REGION(X)		(((unsigned long)(X)) >> REGION_SHIFT)
+#define get_machdep_kernel_start(X)	get_machdep_kernel_start_ia64(X)
 #endif          /* ia64 */
 
 #define MSG(x...)	fprintf(stderr, x)
@@ -509,6 +543,8 @@ struct DumpInfo {
 	unsigned long   max_physmem_bits;
 	unsigned long   sections_per_root;
 	unsigned long	phys_base;
+	unsigned long   kernel_start;
+	unsigned long   vmalloc_start;
 
 	/*
 	 * diskdimp info:
@@ -532,6 +568,7 @@ struct DumpInfo {
 	 */
 	unsigned int		num_mem_map;
 	struct mem_map_data	*mem_map_data;
+	unsigned int		mem_flags;
 
 	/*
 	 * Dump memory image info:



More information about the kexec mailing list