[makedumpfile PATCH v2 3/3] sadump: Fix a KASLR problem of sadump

Atsushi Kumagai ats-kumagai at wm.jp.nec.com
Wed Oct 25 17:16:00 PDT 2017


Hello Indoh-san,

Sorry for the too late response, please see below.

>This patch fix a problem that makedumpfile cannot handle a dumpfile
>which is captured by sadump in KASLR enabled kernel.
>
>When KASLR feature is enabled, a kernel is placed on the memory randomly
>and therefore makedumpfile cannot handle a dumpfile captured by sadump
>because addresses of kernel symbols in System.map or vmlinux are
>different from actual addresses.
>
>To solve this problem, we need to calculate kaslr offset(the difference
>between original symbol address and actual address) and phys_base, and
>adjust symbol table of makedumpfile. In the case of dumpfile of kdump,
>these information is included in the header, but dumpfile of sadump does
>not have such a information.
>
>This patch calculate kaslr offset and phys_base to solve this problem.
>Please see the comment in the calc_kaslr_offset() for the detail idea.
>The basic idea is getting register (IDTR and CR3) from dump header, and
>calculate kaslr_offset/phys_base using them.
>
>Signed-off-by: Takao Indoh <indou.takao at jp.fujitsu.com>
>---
> makedumpfile.c |  11 ++
> makedumpfile.h |   6 +-
> sadump_info.c  | 415 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 429 insertions(+), 3 deletions(-)
>
>diff --git a/makedumpfile.c b/makedumpfile.c
>index 5f2ca7d..4fa1b3a 100644
>--- a/makedumpfile.c
>+++ b/makedumpfile.c
>@@ -1554,6 +1554,10 @@ get_symbol_info(void)
> 	SYMBOL_INIT(demote_segment_4k, "demote_segment_4k");
> 	SYMBOL_INIT(cur_cpu_spec, "cur_cpu_spec");
>
>+	SYMBOL_INIT(divide_error, "divide_error");
>+	SYMBOL_INIT(idt_table, "idt_table");
>+	SYMBOL_INIT(saved_command_line, "saved_command_line");
>+
> 	return TRUE;
> }
>
>@@ -2249,6 +2253,13 @@ write_vmcoreinfo_data(void)
> 	WRITE_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset);
> #endif
>
>+	if (info->phys_base)
>+		fprintf(info->file_vmcoreinfo, "%s%lu\n", STR_NUMBER("phys_base"),
>+		        info->phys_base);
>+	if (info->kaslr_offset)
>+		fprintf(info->file_vmcoreinfo, "%s%lx\n", STR_KERNELOFFSET,
>+		        info->kaslr_offset);
>+
> 	/*
> 	 * write the source file of 1st kernel
> 	 */
>diff --git a/makedumpfile.h b/makedumpfile.h
>index f48dc0b..db75379 100644
>--- a/makedumpfile.h
>+++ b/makedumpfile.h
>@@ -45,6 +45,7 @@
> #include "sadump_mod.h"
> #include <pthread.h>
> #include <semaphore.h>
>+#include <inttypes.h>
>
> #define VMEMMAPSTART 0xffffea0000000000UL
> #define BITS_PER_WORD 64
>@@ -1599,6 +1600,9 @@ struct symbol_table {
> 	unsigned long long	cpu_online_mask;
> 	unsigned long long	__cpu_online_mask;
> 	unsigned long long	kexec_crash_image;
>+	unsigned long long	divide_error;
>+	unsigned long long	idt_table;
>+	unsigned long long	saved_command_line;
>
> 	/*
> 	 * symbols on ppc64 arch
>@@ -1960,7 +1964,7 @@ int iomem_for_each_line(char *match, int (*callback)(void *data, int nr,
> 						     unsigned long length),
> 			void *data);
> int is_bigendian(void);
>-
>+int get_symbol_info(void);
>
> /*
>  * for Xen extraction
>diff --git a/sadump_info.c b/sadump_info.c
>index 7dd22e7..485fa80 100644
>--- a/sadump_info.c
>+++ b/sadump_info.c
>@@ -1035,6 +1035,410 @@ sadump_get_max_mapnr(void)
>
> #ifdef __x86_64__
>
>+/*
>+ * Get address of vector0 interrupt handler (Devide Error) form Interrupt
>+ * Descriptor Table.
>+ */
>+static unsigned long
>+get_vec0_addr(ulong idtr)
>+{
>+	struct gate_struct64 {
>+		uint16_t offset_low;
>+		uint16_t segment;
>+		uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
>+		uint16_t offset_middle;
>+		uint32_t offset_high;
>+		uint32_t zero1;
>+	} __attribute__((packed)) gate;
>+
>+	readmem(PADDR, idtr, &gate, sizeof(gate));
>+
>+	return ((ulong)gate.offset_high << 32)
>+		+ ((ulong)gate.offset_middle << 16)
>+		+ gate.offset_low;
>+}
>+
>+/*
>+ * Parse a string of [size[KMG]@]offset[KMG]
>+ * Import from Linux kernel(lib/cmdline.c)
>+ */
>+static ulong memparse(char *ptr, char **retptr)
>+{
>+	char *endptr;
>+
>+	unsigned long long ret = strtoull(ptr, &endptr, 0);
>+
>+	switch (*endptr) {
>+	case 'E':
>+	case 'e':
>+		ret <<= 10;
>+	case 'P':
>+	case 'p':
>+		ret <<= 10;
>+	case 'T':
>+	case 't':
>+		ret <<= 10;
>+	case 'G':
>+	case 'g':
>+		ret <<= 10;
>+	case 'M':
>+	case 'm':
>+		ret <<= 10;
>+	case 'K':
>+	case 'k':
>+		ret <<= 10;
>+		endptr++;
>+	default:
>+		break;
>+	}
>+
>+	if (retptr)
>+		*retptr = endptr;
>+
>+	return ret;
>+}
>+
>+/*
>+ * Find "elfcorehdr=" in the boot parameter of kernel and return the address
>+ * of elfcorehdr.
>+ */
>+static ulong
>+get_elfcorehdr(ulong cr3)
>+{
>+	char cmdline[BUFSIZE], *ptr;
>+	ulong cmdline_vaddr;
>+	ulong cmdline_paddr;
>+	ulong buf_vaddr, buf_paddr;
>+	char *end;
>+	ulong elfcorehdr_addr = 0, elfcorehdr_size = 0;
>+
>+	if (SYMBOL(saved_command_line) == NOT_FOUND_SYMBOL) {
>+		ERRMSG("Can't get the symbol of saved_command_line.\n");
>+		return 0;
>+	}
>+	cmdline_vaddr = SYMBOL(saved_command_line);
>+	if ((cmdline_paddr = vtop4_x86_64_pagetable(cmdline_vaddr, cr3)) == NOT_PADDR)
>+		return 0;
>+
>+	DEBUG_MSG("sadump: cmdline vaddr: %lx\n", cmdline_vaddr);
>+	DEBUG_MSG("sadump: cmdline paddr: %lx\n", cmdline_paddr);
>+
>+	if (!readmem(PADDR, cmdline_paddr, &buf_vaddr, sizeof(ulong)))
>+		return 0;
>+
>+	if ((buf_paddr = vtop4_x86_64_pagetable(buf_vaddr, cr3)) == NOT_PADDR)
>+		return 0;
>+
>+	DEBUG_MSG("sadump: cmdline buf vaddr: %lx\n", buf_vaddr);
>+	DEBUG_MSG("sadump: cmdline buf paddr: %lx\n", buf_paddr);
>+
>+	memset(cmdline, 0, BUFSIZE);
>+	if (!readmem(PADDR, buf_paddr, cmdline, BUFSIZE))
>+		return 0;
>+
>+	ptr = strstr(cmdline, "elfcorehdr=");
>+	if (!ptr)
>+		return 0;
>+
>+	DEBUG_MSG("sadump: 2nd kernel detected.\n");
>+
>+	ptr += strlen("elfcorehdr=");
>+	elfcorehdr_addr = memparse(ptr, &end);
>+	if (*end == '@') {
>+		elfcorehdr_size = elfcorehdr_addr;
>+		elfcorehdr_addr = memparse(end + 1, &end);
>+	}
>+
>+	DEBUG_MSG("sadump: elfcorehdr_addr: %lx\n", elfcorehdr_addr);
>+	DEBUG_MSG("sadump: elfcorehdr_size: %lx\n", elfcorehdr_size);
>+
>+	return elfcorehdr_addr;
>+}
>+
>+/*
>+ * Get vmcoreinfo from elfcorehdr.
>+ * Some codes are imported from Linux kernel(fs/proc/vmcore.c)
>+ */
>+static int
>+get_vmcoreinfo_in_kdump_kernel(ulong elfcorehdr, ulong *addr, int *len)
>+{
>+	unsigned char e_ident[EI_NIDENT];
>+	Elf64_Ehdr ehdr;
>+	Elf64_Phdr phdr;
>+	Elf64_Nhdr nhdr;
>+	ulong ptr;
>+	ulong nhdr_offset = 0;
>+	int i;
>+
>+	if (!readmem(PADDR, elfcorehdr, e_ident, EI_NIDENT))
>+		return FALSE;
>+
>+	if (e_ident[EI_CLASS] != ELFCLASS64) {
>+		ERRMSG("Only ELFCLASS64 is supportd\n");
>+		return FALSE;
>+	}
>+
>+	if (!readmem(PADDR, elfcorehdr, &ehdr, sizeof(ehdr)))
>+		return FALSE;
>+
>+	/* Sanity Check */
>+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
>+		(ehdr.e_type != ET_CORE) ||
>+		ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
>+		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
>+		ehdr.e_version != EV_CURRENT ||
>+		ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
>+		ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
>+		ehdr.e_phnum == 0) {
>+		ERRMSG("Invalid elf header\n");
>+		return FALSE;
>+	}
>+
>+	ptr = elfcorehdr + ehdr.e_phoff;
>+	for (i = 0; i < ehdr.e_phnum; i++) {
>+		ulong offset;
>+		char name[16];
>+
>+		if (!readmem(PADDR, ptr, &phdr, sizeof(phdr)))
>+			return FALSE;
>+
>+		ptr += sizeof(phdr);
>+		if (phdr.p_type != PT_NOTE)
>+			continue;
>+
>+		offset = phdr.p_offset;
>+		if (!readmem(PADDR, offset, &nhdr, sizeof(nhdr)))
>+			return FALSE;
>+
>+		offset += divideup(sizeof(Elf64_Nhdr), sizeof(Elf64_Word))*
>+			  sizeof(Elf64_Word);
>+		memset(name, 0, sizeof(name));
>+		if (!readmem(PADDR, offset, name, sizeof(name)))
>+			return FALSE;
>+
>+		if(!strcmp(name, "VMCOREINFO")) {
>+			nhdr_offset = offset;
>+			break;
>+		}
>+	}
>+
>+	if (!nhdr_offset)
>+		return FALSE;
>+
>+	*addr = nhdr_offset +
>+		divideup(nhdr.n_namesz, sizeof(Elf64_Word))*
>+		sizeof(Elf64_Word);
>+	*len = nhdr.n_descsz;
>+
>+	DEBUG_MSG("sadump: vmcoreinfo addr: %lx\n", *addr);
>+	DEBUG_MSG("sadump: vmcoreinfo len:  %d\n", *len);
>+
>+	return TRUE;
>+}
>+
>+/*
>+ * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd kernel.
>+ * If we are in 2nd kernel, get kaslr_offset/phys_base from vmcoreinfo.
>+ *
>+ * 1. Get command line and try to retrieve "elfcorehdr=" boot parameter
>+ * 2. If "elfcorehdr=" is not found in command line, we are in 1st kernel.
>+ *    There is nothing to do.
>+ * 3. If "elfcorehdr=" is found, we are in 2nd kernel. Find vmcoreinfo
>+ *    using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo.
>+ */
>+int
>+get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong *kaslr_offset,
>+				 ulong *phys_base)
>+{
>+	ulong elfcorehdr_addr = 0;
>+	ulong vmcoreinfo_addr;
>+	int vmcoreinfo_len;
>+	char *buf, *pos;
>+	int ret = FALSE;
>+
>+	elfcorehdr_addr = get_elfcorehdr(cr3);
>+	if (!elfcorehdr_addr)
>+		return FALSE;
>+
>+	if (!get_vmcoreinfo_in_kdump_kernel(elfcorehdr_addr, &vmcoreinfo_addr,
>+					    &vmcoreinfo_len))
>+		return FALSE;
>+
>+	if (!vmcoreinfo_len)
>+		return FALSE;
>+
>+	DEBUG_MSG("sadump: Find vmcoreinfo in kdump memory\n");
>+
>+	if (!(buf = malloc(vmcoreinfo_len))) {
>+		ERRMSG("Can't allocate vmcoreinfo buffer.\n");
>+		return FALSE;
>+	}
>+
>+	if (!readmem(PADDR, vmcoreinfo_addr, buf, vmcoreinfo_len))
>+		goto finish;
>+
>+	pos = strstr(buf, STR_NUMBER("phys_base"));
>+	if (!pos)
>+		goto finish;
>+	*phys_base  = strtoull(pos + strlen(STR_NUMBER("phys_base")), NULL, 0);
>+
>+	pos = strstr(buf, STR_KERNELOFFSET);
>+	if (!pos)
>+		goto finish;
>+	*kaslr_offset = strtoull(pos + strlen(STR_KERNELOFFSET), NULL, 16);
>+	ret = TRUE;
>+
>+finish:
>+	free(buf);
>+	return ret;
>+}
>+
>+/*
>+ * Calculate kaslr_offset and phys_base
>+ *
>+ * kaslr_offset:
>+ *   The difference between original address in vmlinux and actual address
>+ *   placed randomly by kaslr feature. To be more accurate,
>+ *   kaslr_offset = actual address  - original address
>+ *
>+ * phys_base:
>+ *   Physical address where the kerenel is placed. In other words, it's a
>+ *   physical address of __START_KERNEL_map. This is also decided randomly by
>+ *   kaslr.
>+ *
>+ * kaslr offset and phys_base are calculated as follows:
>+ *
>+ * kaslr_offset:
>+ * 1) Get IDTR and CR3 value from the dump header.
>+ * 2) Get a virtual address of IDT from IDTR value
>+ *    --- (A)
>+ * 3) Translate (A) to physical address using CR3, which points a top of
>+ *    page table.
>+ *    --- (B)
>+ * 4) Get an address of vector0 (Devide Error) interrupt handler from
>+ *    IDT, which are pointed by (B).
>+ *    --- (C)
>+ * 5) Get an address of symbol "divide_error" form vmlinux
>+ *    --- (D)
>+ *
>+ * Now we have two addresses:
>+ * (C)-> Actual address of "divide_error"
>+ * (D)-> Original address of "divide_error" in the vmlinux
>+ *
>+ * kaslr_offset can be calculated by the difference between these two
>+ * value.
>+ *
>+ * phys_base;
>+ * 1) Get IDT virtual address from vmlinux
>+ *    --- (E)
>+ *
>+ * So phys_base can be calculated using relationship of directly mapped
>+ * address.
>+ *
>+ * phys_base =
>+ *   Physical address(B) -
>+ *   (Virtual address(E) + kaslr_offset - __START_KERNEL_map)
>+ *
>+ * Note that the address (A) cannot be used instead of (E) because (A) is
>+ * not direct map address, it's a fixed map address.
>+ *
>+ * This solution works in most every case, but does not work in the
>+ * following case.
>+ *
>+ * 1) If the dump is captured on early stage of kernel boot, IDTR points
>+ *    early IDT table(early_idts) instead of normal IDT(idt_table).
>+ * 2) If the dump is captured whle kdump is working, IDTR points
                                  ^i
>+ *    IDT table of 2nd kernel, not 1st kernel.

These cases sound like only for outside dump mechanisms like sadump, right ?
I think the functions for the case 2) are extra features while calculating
kaslr_offset is an essential solution for the KASLR problem.
I hope you split this patch in two since it's large. Concretely,

>+ *
>+ * Current implementation does not support the case 1), need
>+ * enhancement in the future. For the case 2), get kaslr_offset and
>+ * phys_base as follows.
>+ *
>+ * 1) Get kaslr_offset and phys_base using the above solution.
>+ * 2) Get kernel boot parameter from "saved_command_line"
>+ * 3) If "elfcorehdr=" is not included in boot parameter, we are in the
>+ *    first kernel, nothing to do any more.
>+ * 4) If "elfcorehdr=" is included in boot parameter, we are in the 2nd
>+ *    kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and
>+ *    get kaslr_offset and phys_base from vmcoreinfo.
>+ */

  1)      ->  [PATCH 3/4]
  2)-4)   ->  [PATCH 4/4]


Thanks,
Atsushi Kumagai

>+int
>+calc_kaslr_offset(void)
>+{
>+	struct sadump_header *sh = si->sh_memory;
>+	uint64_t idtr = 0, cr3 = 0, idtr_paddr;
>+	struct sadump_smram_cpu_state smram, zero;
>+	int apicid;
>+	unsigned long divide_error_vmcore, divide_error_vmlinux;
>+
>+	unsigned long kaslr_offset_kdump, phys_base_kdump;
>+	unsigned long kaslr_offset, phys_base;
>+
>+	memset(&zero, 0, sizeof(zero));
>+	for (apicid = 0; apicid < sh->nr_cpus; ++apicid) {
>+		if (!get_smram_cpu_state(apicid, &smram)) {
>+			ERRMSG("get_smram_cpu_state error\n");
>+			return FALSE;
>+		}
>+
>+		if (memcmp(&smram, &zero, sizeof(smram)) != 0)
>+			break;
>+	}
>+	if (apicid >= sh->nr_cpus) {
>+		ERRMSG("Can't get smram state\n");
>+		return FALSE;
>+	}
>+
>+	idtr = ((uint64_t)smram.IdtUpper)<<32 | (uint64_t)smram.IdtLower;
>+	cr3 = smram.Cr3;
>+
>+	/* Convert virtual address of IDT table to physical address */
>+	if ((idtr_paddr = vtop4_x86_64_pagetable(idtr, cr3)) == NOT_PADDR)
>+		return FALSE;
>+
>+	/* Now we can calculate kaslr_offset and phys_base */
>+	divide_error_vmlinux = SYMBOL(divide_error);
>+	divide_error_vmcore = get_vec0_addr(idtr_paddr);
>+	kaslr_offset = divide_error_vmcore - divide_error_vmlinux;
>+	phys_base = idtr_paddr -
>+		(SYMBOL(idt_table) + kaslr_offset - __START_KERNEL_map);
>+
>+	info->kaslr_offset = kaslr_offset;
>+	info->phys_base = phys_base;
>+
>+	DEBUG_MSG("sadump: idtr=%" PRIx64 "\n", idtr);
>+	DEBUG_MSG("sadump: cr3=%" PRIx64 "\n", cr3);
>+	DEBUG_MSG("sadump: idtr(phys)=%" PRIx64 "\n", idtr_paddr);
>+	DEBUG_MSG("sadump: devide_error(vmlinux)=%lx\n",
>+		divide_error_vmlinux);
>+	DEBUG_MSG("sadump: devide_error(vmcore)=%lx\n",
>+		divide_error_vmcore);
>+
>+	/* Reload symbol */
>+	if (!get_symbol_info())
>+		return FALSE;
>+
>+	/*
>+	 * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd
>+	 * kernel. If we are in 2nd kernel, get kaslr_offset/phys_base
>+	 * from vmcoreinfo
>+	 */
>+	if (get_kaslr_offset_from_vmcoreinfo(cr3, &kaslr_offset_kdump,
>+					    &phys_base_kdump)) {
>+		info->kaslr_offset = kaslr_offset_kdump;
>+		info->phys_base = phys_base_kdump;
>+
>+		/* Reload symbol */
>+		if (!get_symbol_info())
>+			return FALSE;
>+	}
>+
>+	DEBUG_MSG("sadump: kaslr_offset=%lx\n", info->kaslr_offset);
>+	DEBUG_MSG("sadump: phys_base=%lx\n", info->phys_base);
>+
>+	return TRUE;
>+}
>+
> int
> sadump_virt_phys_base(void)
> {
>@@ -1065,6 +1469,9 @@ sadump_virt_phys_base(void)
> 	}
>
> failed:
>+	if (calc_kaslr_offset())
>+		return TRUE;
>+
> 	info->phys_base = 0;
>
> 	DEBUG_MSG("sadump: failed to calculate phys_base; default to 0\n");
>@@ -1518,10 +1925,14 @@ cpu_to_apicid(int cpu, int *apicid)
> 		if (!readmem(VADDR, SYMBOL(x86_bios_cpu_apicid_early_ptr),
> 			     &early_ptr, sizeof(early_ptr)))
> 			return FALSE;
>-
>+		/*
>+		 * Note: SYMBOL(name) value is adjusted by info->kaslr_offset,
>+		 * but per_cpu symbol does not need to be adjusted becasue it
>+		 * is not affected by kaslr.
>+		 */
> 		apicid_addr = early_ptr
> 			? SYMBOL(x86_bios_cpu_apicid_early_map)+cpu*sizeof(uint16_t)
>-			: per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid), cpu);
>+			: per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid) - info->kaslr_offset, cpu);
>
> 		if (!readmem(VADDR, apicid_addr, &apicid_u16, sizeof(uint16_t)))
> 			return FALSE;
>--
>2.9.5




More information about the kexec mailing list