[PATCH v2 12/14] Read kexec backup region

HATAYAMA Daisuke d.hatayama at jp.fujitsu.com
Fri Oct 28 05:49:09 EDT 2011


Deal with the first 640kB memory backed up by kdump by interpreting
the read request to the region as the request to the backup region
that is specially prepared to preserve the original first kernel's
memory situtation.

The first 640kB memory contains the data necessary for paging: PTE,
for example. So, initialization must be done before trying to read
memory requiring paging to read such as VMALLOC'ed data.

Signed-off-by: HATAYAMA Daisuke <d.hatayama at jp.fujitsu.com>
---

 makedumpfile.c |   45 +++++++++++----
 makedumpfile.h |   41 ++++++++++++++
 sadump_info.c  |  168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 sadump_info.h  |    6 ++
 4 files changed, 249 insertions(+), 11 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index 44f6ee6..df82a48 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -829,6 +829,7 @@ get_symbol_info(void)
 	SYMBOL_INIT(cpu_online_mask, "cpu_online_mask");
 	if (SYMBOL(cpu_online_mask) == NOT_FOUND_SYMBOL)
 		SYMBOL_INIT(cpu_online_mask, "cpu_online_map");
+	SYMBOL_INIT(kexec_crash_image, "kexec_crash_image");
 
 	if (SYMBOL(node_data) != NOT_FOUND_SYMBOL)
 		SYMBOL_ARRAY_TYPE_INIT(node_data, "node_data");
@@ -1115,6 +1116,23 @@ get_structure_info(void)
 	}
 #endif /* __x86_64__ */
 
+	OFFSET_INIT(kimage.segment, "kimage", "segment");
+
+	MEMBER_ARRAY_LENGTH_INIT(kimage.segment, "kimage", "segment");
+
+	SIZE_INIT(kexec_segment, "kexec_segment");
+	OFFSET_INIT(kexec_segment.mem, "kexec_segment", "mem");
+
+	OFFSET_INIT(elf64_hdr.e_phnum, "elf64_hdr", "e_phnum");
+	OFFSET_INIT(elf64_hdr.e_phentsize, "elf64_hdr", "e_phentsize");
+	OFFSET_INIT(elf64_hdr.e_phoff, "elf64_hdr", "e_phoff");
+
+	SIZE_INIT(elf64_hdr, "elf64_hdr");
+	OFFSET_INIT(elf64_phdr.p_type, "elf64_phdr", "p_type");
+	OFFSET_INIT(elf64_phdr.p_offset, "elf64_phdr", "p_offset");
+	OFFSET_INIT(elf64_phdr.p_paddr, "elf64_phdr", "p_paddr");
+	OFFSET_INIT(elf64_phdr.p_memsz, "elf64_phdr", "p_memsz");
+
 	return TRUE;
 }
 
@@ -2619,6 +2637,16 @@ out:
 		if (!get_versiondep_info())
 			return FALSE;
 
+		/*
+		 * NOTE: This must be done before refering to
+		 * VMALLOC'ed memory. The first 640kB contains data
+		 * necessary for paging, like PTE. The absence of the
+		 * region affects reading VMALLOC'ed memory such as
+		 * module data.
+		 */
+		if (info->flag_sadump)
+			sadump_kdump_backup_region_init();
+
 		if (!get_numnodes())
 			return FALSE;
 
@@ -2756,6 +2784,12 @@ set_bit_on_1st_bitmap(unsigned long long pfn)
 }
 
 int
+clear_bit_on_1st_bitmap(unsigned long long pfn)
+{
+	return set_bitmap(info->bitmap1, pfn, 0);
+}
+
+int
 clear_bit_on_2nd_bitmap(unsigned long long pfn)
 {
 	return set_bitmap(info->bitmap2, pfn, 0);
@@ -2796,17 +2830,6 @@ is_in_segs(unsigned long long paddr)
 		return FALSE;
 }
 
-static inline int
-is_zero_page(unsigned char *buf, long page_size)
-{
-	size_t i;
-
-	for (i = 0; i < page_size; i++)
-		if (buf[i])
-			return FALSE;
-	return TRUE;
-}
-
 int
 read_cache(struct cache_data *cd)
 {
diff --git a/makedumpfile.h b/makedumpfile.h
index 020d99c..6ae37d9 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -181,6 +181,7 @@ isAnon(unsigned long mapping)
 #define STRNEQ(A, B)	(A && B && \
 	(strncmp((char *)(A), (char *)(B), strlen((char *)(B))) == 0))
 
+#define USHORT(ADDR)	*((unsigned short *)(ADDR))
 #define UINT(ADDR)	*((unsigned int *)(ADDR))
 #define ULONG(ADDR)	*((unsigned long *)(ADDR))
 
@@ -996,6 +997,7 @@ struct symbol_table {
 	unsigned long long	__per_cpu_offset;
 	unsigned long long	__per_cpu_load;
 	unsigned long long	cpu_online_mask;
+	unsigned long long	kexec_crash_image;
 };
 
 struct size_table {
@@ -1027,6 +1029,8 @@ struct size_table {
 	long	user_regs_struct;
 	long	cpumask;
 	long	cpumask_t;
+	long	kexec_segment;
+	long	elf64_hdr;
 };
 
 struct offset_table {
@@ -1135,6 +1139,27 @@ struct offset_table {
 		long	fs;
 		long	gs;
 	} user_regs_struct;
+
+	struct kimage_s {
+		long	segment;
+	} kimage;
+
+	struct kexec_segment_s {
+		long	mem;
+	} kexec_segment;
+
+	struct elf64_hdr_s {
+		long	e_phnum;
+		long	e_phentsize;
+		long	e_phoff;
+	} elf64_hdr;
+
+	struct elf64_phdr_s {
+		long	p_type;
+		long	p_offset;
+		long	p_paddr;
+		long	p_memsz;
+	} elf64_phdr;
 };
 
 /*
@@ -1159,6 +1184,9 @@ struct array_table {
 	struct free_area_at {
 		long	free_list;
 	} free_area;
+	struct kimage_at {
+		long	segment;
+	} kimage;
 };
 
 struct number_table {
@@ -1331,7 +1359,20 @@ is_dumpable(struct dump_bitmap *bitmap, unsigned long long pfn)
 	return is_on(bitmap->buf, pfn%PFN_BUFBITMAP);
 }
 
+static inline int
+is_zero_page(unsigned char *buf, long page_size)
+{
+	size_t i;
+
+	for (i = 0; i < page_size; i++)
+		if (buf[i])
+			return FALSE;
+	return TRUE;
+}
+
 void write_vmcoreinfo_data(void);
+int set_bit_on_1st_bitmap(unsigned long long pfn);
+int clear_bit_on_1st_bitmap(unsigned long long pfn);
 
 #ifdef __x86__
 
diff --git a/sadump_info.c b/sadump_info.c
index 2538da6..69296ce 100644
--- a/sadump_info.c
+++ b/sadump_info.c
@@ -77,6 +77,12 @@ struct sadump_info {
 	FILE *file_elf_note;
 	char *cpu_online_mask_buf;
 	size_t cpumask_size;
+/* Backup Region, First 640K of System RAM. */
+#define KEXEC_BACKUP_SRC_END    0x0009ffff
+        unsigned long long backup_src_start;
+        unsigned long backup_src_size;
+        unsigned long long backup_offset;
+	int kdump_backed_up;
 };
 
 static char *guid_to_str(efi_guid_t *guid, char *buf, size_t buflen);
@@ -188,6 +194,30 @@ sadump_copy_1st_bitmap_from_memory(void)
 		offset_page += sizeof(buf);
 	}
 
+	/*
+	 * kdump uses the first 640kB on the 2nd kernel. But both
+	 * bitmaps should reflect the 1st kernel memory situation. We
+	 * modify bitmap accordingly.
+	 */
+	if (si->kdump_backed_up) {
+		unsigned long long paddr, pfn, backup_src_pfn;
+
+		for (paddr = si->backup_src_start;
+		     paddr < si->backup_src_start + si->backup_src_size;
+		     paddr += info->page_size) {
+
+			pfn = paddr_to_pfn(paddr);
+			backup_src_pfn = paddr_to_pfn(paddr +
+						      si->backup_offset -
+						      si->backup_src_start);
+
+			if (is_dumpable(info->bitmap_memory, backup_src_pfn))
+				set_bit_on_1st_bitmap(pfn);
+			else
+				clear_bit_on_1st_bitmap(pfn);
+		}
+	}
+
 	return TRUE;
 }
 
@@ -920,6 +950,11 @@ readpmem_sadump(unsigned long long paddr, void *bufptr, size_t size)
 	char buf[info->page_size];
 	int fd_memory;
 
+	if (si->kdump_backed_up &&
+	    paddr >= si->backup_src_start &&
+	    paddr < si->backup_src_start + si->backup_src_size)
+		paddr += si->backup_offset - si->backup_src_start;
+
 	pfn = paddr_to_pfn(paddr);
 	page_offset = paddr % info->page_size;
 
@@ -1774,4 +1809,137 @@ free_sadump_info(void)
 		free(si->cpu_online_mask_buf);
 }
 
+void
+sadump_kdump_backup_region_init(void)
+{
+	unsigned char buf[BUFSIZE];
+	unsigned long i, total, kexec_crash_image_p, elfcorehdr_p;
+	Elf64_Off e_phoff;
+	uint16_t e_phnum, e_phentsize;
+	unsigned long long backup_offset;
+	unsigned long backup_src_start, backup_src_size;
+	size_t bufsize;
+	
+	if (!readmem(VADDR, SYMBOL(kexec_crash_image), &kexec_crash_image_p,
+		     sizeof(unsigned long))) {
+		ERRMSG("Can't read kexec_crash_image pointer. %s\n",
+		       strerror(errno));
+		return;
+	}
+
+	if (!kexec_crash_image_p) {
+		DEBUG_MSG("sadump: kexec crash image was not loaded\n");
+		return;
+	}
+
+	if (!readmem(VADDR, kexec_crash_image_p+OFFSET(kimage.segment),
+		     buf, SIZE(kexec_segment)*ARRAY_LENGTH(kimage.segment))) {
+		ERRMSG("Can't read kexec_crash_image->segment. %s\n",
+		       strerror(errno));
+		return;
+	}
+
+	elfcorehdr_p = 0;
+	for (i = 0; i < ARRAY_LENGTH(kimage.segment); ++i) {
+		char e_ident[EI_NIDENT];
+		unsigned mem;
+
+		mem=ULONG(buf+i*SIZE(kexec_segment)+OFFSET(kexec_segment.mem));
+		if (!mem)
+			continue;
+
+		if (!readmem(PADDR, mem, e_ident, SELFMAG)) {
+			DEBUG_MSG("sadump: failed to read elfcorehdr buffer\n");
+			return;
+		}
+
+		if (strncmp(ELFMAG, e_ident, SELFMAG) == 0) {
+			elfcorehdr_p = mem;
+			break;
+		}
+	}
+	if (!elfcorehdr_p) {
+		DEBUG_MSG("sadump: kexec_crash_image contains no elfcorehdr "
+			  "segment\n");
+		return;
+	}
+
+        if (!readmem(PADDR, elfcorehdr_p, buf, SIZE(elf64_hdr))) {
+		ERRMSG("Can't read elfcorehdr ELF header. %s\n",
+		       strerror(errno));
+		return;
+	}
+
+	e_phnum = USHORT(buf + OFFSET(elf64_hdr.e_phnum));
+	e_phentsize = USHORT(buf + OFFSET(elf64_hdr.e_phentsize));
+	e_phoff = ULONG(buf + OFFSET(elf64_hdr.e_phoff));
+
+	backup_src_start = backup_src_size = backup_offset = 0;
+	for (i = 0; i < e_phnum; ++i) {
+		unsigned long p_type, p_offset, p_paddr, p_memsz;
+
+		if (!readmem(PADDR, elfcorehdr_p+e_phoff+i*e_phentsize, buf,
+			     e_phentsize)) {
+			ERRMSG("Can't read elfcorehdr program header. %s\n",
+			       strerror(errno));
+			return;
+		}
+
+		p_type = UINT(buf + OFFSET(elf64_phdr.p_type));
+		p_offset = ULONG(buf + OFFSET(elf64_phdr.p_offset));
+		p_paddr = ULONG(buf + OFFSET(elf64_phdr.p_paddr));
+		p_memsz = ULONG(buf + OFFSET(elf64_phdr.p_memsz));
+
+		if (p_type == PT_LOAD &&
+		    p_paddr <= KEXEC_BACKUP_SRC_END &&
+		    p_paddr + p_memsz <= p_offset) {
+
+			backup_src_start = p_paddr;
+			backup_src_size = p_memsz;
+			backup_offset = p_offset;
+
+DEBUG_MSG("sadump: SRC_START: %#016lx SRC_SIZE: %#016lx SRC_OFFSET: %#016llx\n",
+	  backup_src_start, backup_src_size, backup_offset);
+
+			break;
+		}
+	}
+	if (i == e_phnum) {
+DEBUG_MSG("sadump: No PT_LOAD in elfcorehdr for backup area\n");
+		return;
+	}
+
+	bufsize = BUFSIZE;
+	for (total = 0; total < backup_src_size; total += bufsize) {
+
+		if (backup_src_size - total < BUFSIZE)
+			bufsize = backup_src_size - total;
+
+		if (!readmem(PADDR, backup_offset + total, buf, bufsize)) {
+			ERRMSG("Can't read bacckup region. %s\n",
+			       strerror(errno));
+			return;
+		}
+
+		/*
+		 * We're assuming that the backup region is full of 0
+		 * before kdump saves the first 640kB memory of the
+		 * 1st kernel in the region.
+		 */
+		if (!is_zero_page(buf, bufsize)) {
+
+			si->kdump_backed_up = TRUE;
+			si->backup_src_start = backup_src_start;
+			si->backup_src_size = backup_src_size;
+			si->backup_offset = backup_offset;
+
+			DEBUG_MSG("sadump: kdump backup region used\n");
+
+			return;
+		}
+	}
+
+	DEBUG_MSG("sadump: kdump backup region unused\n");
+}
+
 #endif /* defined(__x86__) && defined(__x86_64__) */
diff --git a/sadump_info.h b/sadump_info.h
index f90ea5a..1f74ee5 100644
--- a/sadump_info.h
+++ b/sadump_info.h
@@ -54,6 +54,7 @@ long sadump_page_size(void);
 char *sadump_head_disk_name_memory(void);
 char *sadump_format_type_name(void);
 void free_sadump_info(void);
+void sadump_kdump_backup_region_init(void);
 
 static inline int sadump_is_supported_arch(void)
 {
@@ -154,6 +155,11 @@ static inline int sadump_is_supported_arch(void)
 	return FALSE;
 }
 
+static inline void sadump_kdump_backup_region_init(void)
+{
+	return;
+}
+
 #endif
 
 #endif /* _SADUMP_INFO_H */




More information about the kexec mailing list