[makedumpfile PATCH v2 3/3] sadump: Fix a KASLR problem of sadump

Takao Indoh indou.takao at jp.fujitsu.com
Wed Oct 25 18:19:37 PDT 2017


On Thu, Oct 26, 2017 at 12:16:00AM +0000, Atsushi Kumagai wrote:
> Hello Indoh-san,
> 
> Sorry for the too late response, please see below.
> 
> >This patch fix a problem that makedumpfile cannot handle a dumpfile
> >which is captured by sadump in KASLR enabled kernel.
> >
> >When KASLR feature is enabled, a kernel is placed on the memory randomly
> >and therefore makedumpfile cannot handle a dumpfile captured by sadump
> >because addresses of kernel symbols in System.map or vmlinux are
> >different from actual addresses.
> >
> >To solve this problem, we need to calculate kaslr offset(the difference
> >between original symbol address and actual address) and phys_base, and
> >adjust symbol table of makedumpfile. In the case of dumpfile of kdump,
> >these information is included in the header, but dumpfile of sadump does
> >not have such a information.
> >
> >This patch calculate kaslr offset and phys_base to solve this problem.
> >Please see the comment in the calc_kaslr_offset() for the detail idea.
> >The basic idea is getting register (IDTR and CR3) from dump header, and
> >calculate kaslr_offset/phys_base using them.
> >
> >Signed-off-by: Takao Indoh <indou.takao at jp.fujitsu.com>
> >---
> > makedumpfile.c |  11 ++
> > makedumpfile.h |   6 +-
> > sadump_info.c  | 415 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> > 3 files changed, 429 insertions(+), 3 deletions(-)
> >
> >diff --git a/makedumpfile.c b/makedumpfile.c
> >index 5f2ca7d..4fa1b3a 100644
> >--- a/makedumpfile.c
> >+++ b/makedumpfile.c
> >@@ -1554,6 +1554,10 @@ get_symbol_info(void)
> > 	SYMBOL_INIT(demote_segment_4k, "demote_segment_4k");
> > 	SYMBOL_INIT(cur_cpu_spec, "cur_cpu_spec");
> >
> >+	SYMBOL_INIT(divide_error, "divide_error");
> >+	SYMBOL_INIT(idt_table, "idt_table");
> >+	SYMBOL_INIT(saved_command_line, "saved_command_line");
> >+
> > 	return TRUE;
> > }
> >
> >@@ -2249,6 +2253,13 @@ write_vmcoreinfo_data(void)
> > 	WRITE_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset);
> > #endif
> >
> >+	if (info->phys_base)
> >+		fprintf(info->file_vmcoreinfo, "%s%lu\n", STR_NUMBER("phys_base"),
> >+		        info->phys_base);
> >+	if (info->kaslr_offset)
> >+		fprintf(info->file_vmcoreinfo, "%s%lx\n", STR_KERNELOFFSET,
> >+		        info->kaslr_offset);
> >+
> > 	/*
> > 	 * write the source file of 1st kernel
> > 	 */
> >diff --git a/makedumpfile.h b/makedumpfile.h
> >index f48dc0b..db75379 100644
> >--- a/makedumpfile.h
> >+++ b/makedumpfile.h
> >@@ -45,6 +45,7 @@
> > #include "sadump_mod.h"
> > #include <pthread.h>
> > #include <semaphore.h>
> >+#include <inttypes.h>
> >
> > #define VMEMMAPSTART 0xffffea0000000000UL
> > #define BITS_PER_WORD 64
> >@@ -1599,6 +1600,9 @@ struct symbol_table {
> > 	unsigned long long	cpu_online_mask;
> > 	unsigned long long	__cpu_online_mask;
> > 	unsigned long long	kexec_crash_image;
> >+	unsigned long long	divide_error;
> >+	unsigned long long	idt_table;
> >+	unsigned long long	saved_command_line;
> >
> > 	/*
> > 	 * symbols on ppc64 arch
> >@@ -1960,7 +1964,7 @@ int iomem_for_each_line(char *match, int (*callback)(void *data, int nr,
> > 						     unsigned long length),
> > 			void *data);
> > int is_bigendian(void);
> >-
> >+int get_symbol_info(void);
> >
> > /*
> >  * for Xen extraction
> >diff --git a/sadump_info.c b/sadump_info.c
> >index 7dd22e7..485fa80 100644
> >--- a/sadump_info.c
> >+++ b/sadump_info.c
> >@@ -1035,6 +1035,410 @@ sadump_get_max_mapnr(void)
> >
> > #ifdef __x86_64__
> >
> >+/*
> >+ * Get address of vector0 interrupt handler (Devide Error) form Interrupt
> >+ * Descriptor Table.
> >+ */
> >+static unsigned long
> >+get_vec0_addr(ulong idtr)
> >+{
> >+	struct gate_struct64 {
> >+		uint16_t offset_low;
> >+		uint16_t segment;
> >+		uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
> >+		uint16_t offset_middle;
> >+		uint32_t offset_high;
> >+		uint32_t zero1;
> >+	} __attribute__((packed)) gate;
> >+
> >+	readmem(PADDR, idtr, &gate, sizeof(gate));
> >+
> >+	return ((ulong)gate.offset_high << 32)
> >+		+ ((ulong)gate.offset_middle << 16)
> >+		+ gate.offset_low;
> >+}
> >+
> >+/*
> >+ * Parse a string of [size[KMG]@]offset[KMG]
> >+ * Import from Linux kernel(lib/cmdline.c)
> >+ */
> >+static ulong memparse(char *ptr, char **retptr)
> >+{
> >+	char *endptr;
> >+
> >+	unsigned long long ret = strtoull(ptr, &endptr, 0);
> >+
> >+	switch (*endptr) {
> >+	case 'E':
> >+	case 'e':
> >+		ret <<= 10;
> >+	case 'P':
> >+	case 'p':
> >+		ret <<= 10;
> >+	case 'T':
> >+	case 't':
> >+		ret <<= 10;
> >+	case 'G':
> >+	case 'g':
> >+		ret <<= 10;
> >+	case 'M':
> >+	case 'm':
> >+		ret <<= 10;
> >+	case 'K':
> >+	case 'k':
> >+		ret <<= 10;
> >+		endptr++;
> >+	default:
> >+		break;
> >+	}
> >+
> >+	if (retptr)
> >+		*retptr = endptr;
> >+
> >+	return ret;
> >+}
> >+
> >+/*
> >+ * Find "elfcorehdr=" in the boot parameter of kernel and return the address
> >+ * of elfcorehdr.
> >+ */
> >+static ulong
> >+get_elfcorehdr(ulong cr3)
> >+{
> >+	char cmdline[BUFSIZE], *ptr;
> >+	ulong cmdline_vaddr;
> >+	ulong cmdline_paddr;
> >+	ulong buf_vaddr, buf_paddr;
> >+	char *end;
> >+	ulong elfcorehdr_addr = 0, elfcorehdr_size = 0;
> >+
> >+	if (SYMBOL(saved_command_line) == NOT_FOUND_SYMBOL) {
> >+		ERRMSG("Can't get the symbol of saved_command_line.\n");
> >+		return 0;
> >+	}
> >+	cmdline_vaddr = SYMBOL(saved_command_line);
> >+	if ((cmdline_paddr = vtop4_x86_64_pagetable(cmdline_vaddr, cr3)) == NOT_PADDR)
> >+		return 0;
> >+
> >+	DEBUG_MSG("sadump: cmdline vaddr: %lx\n", cmdline_vaddr);
> >+	DEBUG_MSG("sadump: cmdline paddr: %lx\n", cmdline_paddr);
> >+
> >+	if (!readmem(PADDR, cmdline_paddr, &buf_vaddr, sizeof(ulong)))
> >+		return 0;
> >+
> >+	if ((buf_paddr = vtop4_x86_64_pagetable(buf_vaddr, cr3)) == NOT_PADDR)
> >+		return 0;
> >+
> >+	DEBUG_MSG("sadump: cmdline buf vaddr: %lx\n", buf_vaddr);
> >+	DEBUG_MSG("sadump: cmdline buf paddr: %lx\n", buf_paddr);
> >+
> >+	memset(cmdline, 0, BUFSIZE);
> >+	if (!readmem(PADDR, buf_paddr, cmdline, BUFSIZE))
> >+		return 0;
> >+
> >+	ptr = strstr(cmdline, "elfcorehdr=");
> >+	if (!ptr)
> >+		return 0;
> >+
> >+	DEBUG_MSG("sadump: 2nd kernel detected.\n");
> >+
> >+	ptr += strlen("elfcorehdr=");
> >+	elfcorehdr_addr = memparse(ptr, &end);
> >+	if (*end == '@') {
> >+		elfcorehdr_size = elfcorehdr_addr;
> >+		elfcorehdr_addr = memparse(end + 1, &end);
> >+	}
> >+
> >+	DEBUG_MSG("sadump: elfcorehdr_addr: %lx\n", elfcorehdr_addr);
> >+	DEBUG_MSG("sadump: elfcorehdr_size: %lx\n", elfcorehdr_size);
> >+
> >+	return elfcorehdr_addr;
> >+}
> >+
> >+/*
> >+ * Get vmcoreinfo from elfcorehdr.
> >+ * Some codes are imported from Linux kernel(fs/proc/vmcore.c)
> >+ */
> >+static int
> >+get_vmcoreinfo_in_kdump_kernel(ulong elfcorehdr, ulong *addr, int *len)
> >+{
> >+	unsigned char e_ident[EI_NIDENT];
> >+	Elf64_Ehdr ehdr;
> >+	Elf64_Phdr phdr;
> >+	Elf64_Nhdr nhdr;
> >+	ulong ptr;
> >+	ulong nhdr_offset = 0;
> >+	int i;
> >+
> >+	if (!readmem(PADDR, elfcorehdr, e_ident, EI_NIDENT))
> >+		return FALSE;
> >+
> >+	if (e_ident[EI_CLASS] != ELFCLASS64) {
> >+		ERRMSG("Only ELFCLASS64 is supportd\n");
> >+		return FALSE;
> >+	}
> >+
> >+	if (!readmem(PADDR, elfcorehdr, &ehdr, sizeof(ehdr)))
> >+		return FALSE;
> >+
> >+	/* Sanity Check */
> >+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
> >+		(ehdr.e_type != ET_CORE) ||
> >+		ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
> >+		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
> >+		ehdr.e_version != EV_CURRENT ||
> >+		ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
> >+		ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
> >+		ehdr.e_phnum == 0) {
> >+		ERRMSG("Invalid elf header\n");
> >+		return FALSE;
> >+	}
> >+
> >+	ptr = elfcorehdr + ehdr.e_phoff;
> >+	for (i = 0; i < ehdr.e_phnum; i++) {
> >+		ulong offset;
> >+		char name[16];
> >+
> >+		if (!readmem(PADDR, ptr, &phdr, sizeof(phdr)))
> >+			return FALSE;
> >+
> >+		ptr += sizeof(phdr);
> >+		if (phdr.p_type != PT_NOTE)
> >+			continue;
> >+
> >+		offset = phdr.p_offset;
> >+		if (!readmem(PADDR, offset, &nhdr, sizeof(nhdr)))
> >+			return FALSE;
> >+
> >+		offset += divideup(sizeof(Elf64_Nhdr), sizeof(Elf64_Word))*
> >+			  sizeof(Elf64_Word);
> >+		memset(name, 0, sizeof(name));
> >+		if (!readmem(PADDR, offset, name, sizeof(name)))
> >+			return FALSE;
> >+
> >+		if(!strcmp(name, "VMCOREINFO")) {
> >+			nhdr_offset = offset;
> >+			break;
> >+		}
> >+	}
> >+
> >+	if (!nhdr_offset)
> >+		return FALSE;
> >+
> >+	*addr = nhdr_offset +
> >+		divideup(nhdr.n_namesz, sizeof(Elf64_Word))*
> >+		sizeof(Elf64_Word);
> >+	*len = nhdr.n_descsz;
> >+
> >+	DEBUG_MSG("sadump: vmcoreinfo addr: %lx\n", *addr);
> >+	DEBUG_MSG("sadump: vmcoreinfo len:  %d\n", *len);
> >+
> >+	return TRUE;
> >+}
> >+
> >+/*
> >+ * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd kernel.
> >+ * If we are in 2nd kernel, get kaslr_offset/phys_base from vmcoreinfo.
> >+ *
> >+ * 1. Get command line and try to retrieve "elfcorehdr=" boot parameter
> >+ * 2. If "elfcorehdr=" is not found in command line, we are in 1st kernel.
> >+ *    There is nothing to do.
> >+ * 3. If "elfcorehdr=" is found, we are in 2nd kernel. Find vmcoreinfo
> >+ *    using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo.
> >+ */
> >+int
> >+get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong *kaslr_offset,
> >+				 ulong *phys_base)
> >+{
> >+	ulong elfcorehdr_addr = 0;
> >+	ulong vmcoreinfo_addr;
> >+	int vmcoreinfo_len;
> >+	char *buf, *pos;
> >+	int ret = FALSE;
> >+
> >+	elfcorehdr_addr = get_elfcorehdr(cr3);
> >+	if (!elfcorehdr_addr)
> >+		return FALSE;
> >+
> >+	if (!get_vmcoreinfo_in_kdump_kernel(elfcorehdr_addr, &vmcoreinfo_addr,
> >+					    &vmcoreinfo_len))
> >+		return FALSE;
> >+
> >+	if (!vmcoreinfo_len)
> >+		return FALSE;
> >+
> >+	DEBUG_MSG("sadump: Find vmcoreinfo in kdump memory\n");
> >+
> >+	if (!(buf = malloc(vmcoreinfo_len))) {
> >+		ERRMSG("Can't allocate vmcoreinfo buffer.\n");
> >+		return FALSE;
> >+	}
> >+
> >+	if (!readmem(PADDR, vmcoreinfo_addr, buf, vmcoreinfo_len))
> >+		goto finish;
> >+
> >+	pos = strstr(buf, STR_NUMBER("phys_base"));
> >+	if (!pos)
> >+		goto finish;
> >+	*phys_base  = strtoull(pos + strlen(STR_NUMBER("phys_base")), NULL, 0);
> >+
> >+	pos = strstr(buf, STR_KERNELOFFSET);
> >+	if (!pos)
> >+		goto finish;
> >+	*kaslr_offset = strtoull(pos + strlen(STR_KERNELOFFSET), NULL, 16);
> >+	ret = TRUE;
> >+
> >+finish:
> >+	free(buf);
> >+	return ret;
> >+}
> >+
> >+/*
> >+ * Calculate kaslr_offset and phys_base
> >+ *
> >+ * kaslr_offset:
> >+ *   The difference between original address in vmlinux and actual address
> >+ *   placed randomly by kaslr feature. To be more accurate,
> >+ *   kaslr_offset = actual address  - original address
> >+ *
> >+ * phys_base:
> >+ *   Physical address where the kerenel is placed. In other words, it's a
> >+ *   physical address of __START_KERNEL_map. This is also decided randomly by
> >+ *   kaslr.
> >+ *
> >+ * kaslr offset and phys_base are calculated as follows:
> >+ *
> >+ * kaslr_offset:
> >+ * 1) Get IDTR and CR3 value from the dump header.
> >+ * 2) Get a virtual address of IDT from IDTR value
> >+ *    --- (A)
> >+ * 3) Translate (A) to physical address using CR3, which points a top of
> >+ *    page table.
> >+ *    --- (B)
> >+ * 4) Get an address of vector0 (Devide Error) interrupt handler from
> >+ *    IDT, which are pointed by (B).
> >+ *    --- (C)
> >+ * 5) Get an address of symbol "divide_error" form vmlinux
> >+ *    --- (D)
> >+ *
> >+ * Now we have two addresses:
> >+ * (C)-> Actual address of "divide_error"
> >+ * (D)-> Original address of "divide_error" in the vmlinux
> >+ *
> >+ * kaslr_offset can be calculated by the difference between these two
> >+ * value.
> >+ *
> >+ * phys_base;
> >+ * 1) Get IDT virtual address from vmlinux
> >+ *    --- (E)
> >+ *
> >+ * So phys_base can be calculated using relationship of directly mapped
> >+ * address.
> >+ *
> >+ * phys_base =
> >+ *   Physical address(B) -
> >+ *   (Virtual address(E) + kaslr_offset - __START_KERNEL_map)
> >+ *
> >+ * Note that the address (A) cannot be used instead of (E) because (A) is
> >+ * not direct map address, it's a fixed map address.
> >+ *
> >+ * This solution works in most every case, but does not work in the
> >+ * following case.
> >+ *
> >+ * 1) If the dump is captured on early stage of kernel boot, IDTR points
> >+ *    early IDT table(early_idts) instead of normal IDT(idt_table).
> >+ * 2) If the dump is captured whle kdump is working, IDTR points
>                                   ^i
> >+ *    IDT table of 2nd kernel, not 1st kernel.
> 
> These cases sound like only for outside dump mechanisms like sadump, right ?
> I think the functions for the case 2) are extra features while calculating
> kaslr_offset is an essential solution for the KASLR problem.
> I hope you split this patch in two since it's large. Concretely,
> 
> >+ *
> >+ * Current implementation does not support the case 1), need
> >+ * enhancement in the future. For the case 2), get kaslr_offset and
> >+ * phys_base as follows.
> >+ *
> >+ * 1) Get kaslr_offset and phys_base using the above solution.
> >+ * 2) Get kernel boot parameter from "saved_command_line"
> >+ * 3) If "elfcorehdr=" is not included in boot parameter, we are in the
> >+ *    first kernel, nothing to do any more.
> >+ * 4) If "elfcorehdr=" is included in boot parameter, we are in the 2nd
> >+ *    kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and
> >+ *    get kaslr_offset and phys_base from vmcoreinfo.
> >+ */
> 
>   1)      ->  [PATCH 3/4]
>   2)-4)   ->  [PATCH 4/4]

Thank you for review. Ok, I'll do this.

Thanks,
Takao Indoh

> 
> 
> Thanks,
> Atsushi Kumagai
> 
> >+int
> >+calc_kaslr_offset(void)
> >+{
> >+	struct sadump_header *sh = si->sh_memory;
> >+	uint64_t idtr = 0, cr3 = 0, idtr_paddr;
> >+	struct sadump_smram_cpu_state smram, zero;
> >+	int apicid;
> >+	unsigned long divide_error_vmcore, divide_error_vmlinux;
> >+
> >+	unsigned long kaslr_offset_kdump, phys_base_kdump;
> >+	unsigned long kaslr_offset, phys_base;
> >+
> >+	memset(&zero, 0, sizeof(zero));
> >+	for (apicid = 0; apicid < sh->nr_cpus; ++apicid) {
> >+		if (!get_smram_cpu_state(apicid, &smram)) {
> >+			ERRMSG("get_smram_cpu_state error\n");
> >+			return FALSE;
> >+		}
> >+
> >+		if (memcmp(&smram, &zero, sizeof(smram)) != 0)
> >+			break;
> >+	}
> >+	if (apicid >= sh->nr_cpus) {
> >+		ERRMSG("Can't get smram state\n");
> >+		return FALSE;
> >+	}
> >+
> >+	idtr = ((uint64_t)smram.IdtUpper)<<32 | (uint64_t)smram.IdtLower;
> >+	cr3 = smram.Cr3;
> >+
> >+	/* Convert virtual address of IDT table to physical address */
> >+	if ((idtr_paddr = vtop4_x86_64_pagetable(idtr, cr3)) == NOT_PADDR)
> >+		return FALSE;
> >+
> >+	/* Now we can calculate kaslr_offset and phys_base */
> >+	divide_error_vmlinux = SYMBOL(divide_error);
> >+	divide_error_vmcore = get_vec0_addr(idtr_paddr);
> >+	kaslr_offset = divide_error_vmcore - divide_error_vmlinux;
> >+	phys_base = idtr_paddr -
> >+		(SYMBOL(idt_table) + kaslr_offset - __START_KERNEL_map);
> >+
> >+	info->kaslr_offset = kaslr_offset;
> >+	info->phys_base = phys_base;
> >+
> >+	DEBUG_MSG("sadump: idtr=%" PRIx64 "\n", idtr);
> >+	DEBUG_MSG("sadump: cr3=%" PRIx64 "\n", cr3);
> >+	DEBUG_MSG("sadump: idtr(phys)=%" PRIx64 "\n", idtr_paddr);
> >+	DEBUG_MSG("sadump: devide_error(vmlinux)=%lx\n",
> >+		divide_error_vmlinux);
> >+	DEBUG_MSG("sadump: devide_error(vmcore)=%lx\n",
> >+		divide_error_vmcore);
> >+
> >+	/* Reload symbol */
> >+	if (!get_symbol_info())
> >+		return FALSE;
> >+
> >+	/*
> >+	 * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd
> >+	 * kernel. If we are in 2nd kernel, get kaslr_offset/phys_base
> >+	 * from vmcoreinfo
> >+	 */
> >+	if (get_kaslr_offset_from_vmcoreinfo(cr3, &kaslr_offset_kdump,
> >+					    &phys_base_kdump)) {
> >+		info->kaslr_offset = kaslr_offset_kdump;
> >+		info->phys_base = phys_base_kdump;
> >+
> >+		/* Reload symbol */
> >+		if (!get_symbol_info())
> >+			return FALSE;
> >+	}
> >+
> >+	DEBUG_MSG("sadump: kaslr_offset=%lx\n", info->kaslr_offset);
> >+	DEBUG_MSG("sadump: phys_base=%lx\n", info->phys_base);
> >+
> >+	return TRUE;
> >+}
> >+
> > int
> > sadump_virt_phys_base(void)
> > {
> >@@ -1065,6 +1469,9 @@ sadump_virt_phys_base(void)
> > 	}
> >
> > failed:
> >+	if (calc_kaslr_offset())
> >+		return TRUE;
> >+
> > 	info->phys_base = 0;
> >
> > 	DEBUG_MSG("sadump: failed to calculate phys_base; default to 0\n");
> >@@ -1518,10 +1925,14 @@ cpu_to_apicid(int cpu, int *apicid)
> > 		if (!readmem(VADDR, SYMBOL(x86_bios_cpu_apicid_early_ptr),
> > 			     &early_ptr, sizeof(early_ptr)))
> > 			return FALSE;
> >-
> >+		/*
> >+		 * Note: SYMBOL(name) value is adjusted by info->kaslr_offset,
> >+		 * but per_cpu symbol does not need to be adjusted becasue it
> >+		 * is not affected by kaslr.
> >+		 */
> > 		apicid_addr = early_ptr
> > 			? SYMBOL(x86_bios_cpu_apicid_early_map)+cpu*sizeof(uint16_t)
> >-			: per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid), cpu);
> >+			: per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid) - info->kaslr_offset, cpu);
> >
> > 		if (!readmem(VADDR, apicid_addr, &apicid_u16, sizeof(uint16_t)))
> > 			return FALSE;
> >--
> >2.9.5
> 
> 
> _______________________________________________
> kexec mailing list
> kexec at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
> 




More information about the kexec mailing list