[makedumpfile PATCH v2 3/3] sadump: Fix a KASLR problem of sadump
Atsushi Kumagai
ats-kumagai at wm.jp.nec.com
Wed Oct 25 17:16:00 PDT 2017
Hello Indoh-san,
Sorry for the too late response, please see below.
>This patch fix a problem that makedumpfile cannot handle a dumpfile
>which is captured by sadump in KASLR enabled kernel.
>
>When KASLR feature is enabled, a kernel is placed on the memory randomly
>and therefore makedumpfile cannot handle a dumpfile captured by sadump
>because addresses of kernel symbols in System.map or vmlinux are
>different from actual addresses.
>
>To solve this problem, we need to calculate kaslr offset(the difference
>between original symbol address and actual address) and phys_base, and
>adjust symbol table of makedumpfile. In the case of dumpfile of kdump,
>these information is included in the header, but dumpfile of sadump does
>not have such a information.
>
>This patch calculate kaslr offset and phys_base to solve this problem.
>Please see the comment in the calc_kaslr_offset() for the detail idea.
>The basic idea is getting register (IDTR and CR3) from dump header, and
>calculate kaslr_offset/phys_base using them.
>
>Signed-off-by: Takao Indoh <indou.takao at jp.fujitsu.com>
>---
> makedumpfile.c | 11 ++
> makedumpfile.h | 6 +-
> sadump_info.c | 415 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 429 insertions(+), 3 deletions(-)
>
>diff --git a/makedumpfile.c b/makedumpfile.c
>index 5f2ca7d..4fa1b3a 100644
>--- a/makedumpfile.c
>+++ b/makedumpfile.c
>@@ -1554,6 +1554,10 @@ get_symbol_info(void)
> SYMBOL_INIT(demote_segment_4k, "demote_segment_4k");
> SYMBOL_INIT(cur_cpu_spec, "cur_cpu_spec");
>
>+ SYMBOL_INIT(divide_error, "divide_error");
>+ SYMBOL_INIT(idt_table, "idt_table");
>+ SYMBOL_INIT(saved_command_line, "saved_command_line");
>+
> return TRUE;
> }
>
>@@ -2249,6 +2253,13 @@ write_vmcoreinfo_data(void)
> WRITE_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset);
> #endif
>
>+ if (info->phys_base)
>+ fprintf(info->file_vmcoreinfo, "%s%lu\n", STR_NUMBER("phys_base"),
>+ info->phys_base);
>+ if (info->kaslr_offset)
>+ fprintf(info->file_vmcoreinfo, "%s%lx\n", STR_KERNELOFFSET,
>+ info->kaslr_offset);
>+
> /*
> * write the source file of 1st kernel
> */
>diff --git a/makedumpfile.h b/makedumpfile.h
>index f48dc0b..db75379 100644
>--- a/makedumpfile.h
>+++ b/makedumpfile.h
>@@ -45,6 +45,7 @@
> #include "sadump_mod.h"
> #include <pthread.h>
> #include <semaphore.h>
>+#include <inttypes.h>
>
> #define VMEMMAPSTART 0xffffea0000000000UL
> #define BITS_PER_WORD 64
>@@ -1599,6 +1600,9 @@ struct symbol_table {
> unsigned long long cpu_online_mask;
> unsigned long long __cpu_online_mask;
> unsigned long long kexec_crash_image;
>+ unsigned long long divide_error;
>+ unsigned long long idt_table;
>+ unsigned long long saved_command_line;
>
> /*
> * symbols on ppc64 arch
>@@ -1960,7 +1964,7 @@ int iomem_for_each_line(char *match, int (*callback)(void *data, int nr,
> unsigned long length),
> void *data);
> int is_bigendian(void);
>-
>+int get_symbol_info(void);
>
> /*
> * for Xen extraction
>diff --git a/sadump_info.c b/sadump_info.c
>index 7dd22e7..485fa80 100644
>--- a/sadump_info.c
>+++ b/sadump_info.c
>@@ -1035,6 +1035,410 @@ sadump_get_max_mapnr(void)
>
> #ifdef __x86_64__
>
>+/*
>+ * Get address of vector0 interrupt handler (Devide Error) form Interrupt
>+ * Descriptor Table.
>+ */
>+static unsigned long
>+get_vec0_addr(ulong idtr)
>+{
>+ struct gate_struct64 {
>+ uint16_t offset_low;
>+ uint16_t segment;
>+ uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
>+ uint16_t offset_middle;
>+ uint32_t offset_high;
>+ uint32_t zero1;
>+ } __attribute__((packed)) gate;
>+
>+ readmem(PADDR, idtr, &gate, sizeof(gate));
>+
>+ return ((ulong)gate.offset_high << 32)
>+ + ((ulong)gate.offset_middle << 16)
>+ + gate.offset_low;
>+}
>+
>+/*
>+ * Parse a string of [size[KMG]@]offset[KMG]
>+ * Import from Linux kernel(lib/cmdline.c)
>+ */
>+static ulong memparse(char *ptr, char **retptr)
>+{
>+ char *endptr;
>+
>+ unsigned long long ret = strtoull(ptr, &endptr, 0);
>+
>+ switch (*endptr) {
>+ case 'E':
>+ case 'e':
>+ ret <<= 10;
>+ case 'P':
>+ case 'p':
>+ ret <<= 10;
>+ case 'T':
>+ case 't':
>+ ret <<= 10;
>+ case 'G':
>+ case 'g':
>+ ret <<= 10;
>+ case 'M':
>+ case 'm':
>+ ret <<= 10;
>+ case 'K':
>+ case 'k':
>+ ret <<= 10;
>+ endptr++;
>+ default:
>+ break;
>+ }
>+
>+ if (retptr)
>+ *retptr = endptr;
>+
>+ return ret;
>+}
>+
>+/*
>+ * Find "elfcorehdr=" in the boot parameter of kernel and return the address
>+ * of elfcorehdr.
>+ */
>+static ulong
>+get_elfcorehdr(ulong cr3)
>+{
>+ char cmdline[BUFSIZE], *ptr;
>+ ulong cmdline_vaddr;
>+ ulong cmdline_paddr;
>+ ulong buf_vaddr, buf_paddr;
>+ char *end;
>+ ulong elfcorehdr_addr = 0, elfcorehdr_size = 0;
>+
>+ if (SYMBOL(saved_command_line) == NOT_FOUND_SYMBOL) {
>+ ERRMSG("Can't get the symbol of saved_command_line.\n");
>+ return 0;
>+ }
>+ cmdline_vaddr = SYMBOL(saved_command_line);
>+ if ((cmdline_paddr = vtop4_x86_64_pagetable(cmdline_vaddr, cr3)) == NOT_PADDR)
>+ return 0;
>+
>+ DEBUG_MSG("sadump: cmdline vaddr: %lx\n", cmdline_vaddr);
>+ DEBUG_MSG("sadump: cmdline paddr: %lx\n", cmdline_paddr);
>+
>+ if (!readmem(PADDR, cmdline_paddr, &buf_vaddr, sizeof(ulong)))
>+ return 0;
>+
>+ if ((buf_paddr = vtop4_x86_64_pagetable(buf_vaddr, cr3)) == NOT_PADDR)
>+ return 0;
>+
>+ DEBUG_MSG("sadump: cmdline buf vaddr: %lx\n", buf_vaddr);
>+ DEBUG_MSG("sadump: cmdline buf paddr: %lx\n", buf_paddr);
>+
>+ memset(cmdline, 0, BUFSIZE);
>+ if (!readmem(PADDR, buf_paddr, cmdline, BUFSIZE))
>+ return 0;
>+
>+ ptr = strstr(cmdline, "elfcorehdr=");
>+ if (!ptr)
>+ return 0;
>+
>+ DEBUG_MSG("sadump: 2nd kernel detected.\n");
>+
>+ ptr += strlen("elfcorehdr=");
>+ elfcorehdr_addr = memparse(ptr, &end);
>+ if (*end == '@') {
>+ elfcorehdr_size = elfcorehdr_addr;
>+ elfcorehdr_addr = memparse(end + 1, &end);
>+ }
>+
>+ DEBUG_MSG("sadump: elfcorehdr_addr: %lx\n", elfcorehdr_addr);
>+ DEBUG_MSG("sadump: elfcorehdr_size: %lx\n", elfcorehdr_size);
>+
>+ return elfcorehdr_addr;
>+}
>+
>+/*
>+ * Get vmcoreinfo from elfcorehdr.
>+ * Some codes are imported from Linux kernel(fs/proc/vmcore.c)
>+ */
>+static int
>+get_vmcoreinfo_in_kdump_kernel(ulong elfcorehdr, ulong *addr, int *len)
>+{
>+ unsigned char e_ident[EI_NIDENT];
>+ Elf64_Ehdr ehdr;
>+ Elf64_Phdr phdr;
>+ Elf64_Nhdr nhdr;
>+ ulong ptr;
>+ ulong nhdr_offset = 0;
>+ int i;
>+
>+ if (!readmem(PADDR, elfcorehdr, e_ident, EI_NIDENT))
>+ return FALSE;
>+
>+ if (e_ident[EI_CLASS] != ELFCLASS64) {
>+ ERRMSG("Only ELFCLASS64 is supportd\n");
>+ return FALSE;
>+ }
>+
>+ if (!readmem(PADDR, elfcorehdr, &ehdr, sizeof(ehdr)))
>+ return FALSE;
>+
>+ /* Sanity Check */
>+ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
>+ (ehdr.e_type != ET_CORE) ||
>+ ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
>+ ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
>+ ehdr.e_version != EV_CURRENT ||
>+ ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
>+ ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
>+ ehdr.e_phnum == 0) {
>+ ERRMSG("Invalid elf header\n");
>+ return FALSE;
>+ }
>+
>+ ptr = elfcorehdr + ehdr.e_phoff;
>+ for (i = 0; i < ehdr.e_phnum; i++) {
>+ ulong offset;
>+ char name[16];
>+
>+ if (!readmem(PADDR, ptr, &phdr, sizeof(phdr)))
>+ return FALSE;
>+
>+ ptr += sizeof(phdr);
>+ if (phdr.p_type != PT_NOTE)
>+ continue;
>+
>+ offset = phdr.p_offset;
>+ if (!readmem(PADDR, offset, &nhdr, sizeof(nhdr)))
>+ return FALSE;
>+
>+ offset += divideup(sizeof(Elf64_Nhdr), sizeof(Elf64_Word))*
>+ sizeof(Elf64_Word);
>+ memset(name, 0, sizeof(name));
>+ if (!readmem(PADDR, offset, name, sizeof(name)))
>+ return FALSE;
>+
>+ if(!strcmp(name, "VMCOREINFO")) {
>+ nhdr_offset = offset;
>+ break;
>+ }
>+ }
>+
>+ if (!nhdr_offset)
>+ return FALSE;
>+
>+ *addr = nhdr_offset +
>+ divideup(nhdr.n_namesz, sizeof(Elf64_Word))*
>+ sizeof(Elf64_Word);
>+ *len = nhdr.n_descsz;
>+
>+ DEBUG_MSG("sadump: vmcoreinfo addr: %lx\n", *addr);
>+ DEBUG_MSG("sadump: vmcoreinfo len: %d\n", *len);
>+
>+ return TRUE;
>+}
>+
>+/*
>+ * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd kernel.
>+ * If we are in 2nd kernel, get kaslr_offset/phys_base from vmcoreinfo.
>+ *
>+ * 1. Get command line and try to retrieve "elfcorehdr=" boot parameter
>+ * 2. If "elfcorehdr=" is not found in command line, we are in 1st kernel.
>+ * There is nothing to do.
>+ * 3. If "elfcorehdr=" is found, we are in 2nd kernel. Find vmcoreinfo
>+ * using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo.
>+ */
>+int
>+get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong *kaslr_offset,
>+ ulong *phys_base)
>+{
>+ ulong elfcorehdr_addr = 0;
>+ ulong vmcoreinfo_addr;
>+ int vmcoreinfo_len;
>+ char *buf, *pos;
>+ int ret = FALSE;
>+
>+ elfcorehdr_addr = get_elfcorehdr(cr3);
>+ if (!elfcorehdr_addr)
>+ return FALSE;
>+
>+ if (!get_vmcoreinfo_in_kdump_kernel(elfcorehdr_addr, &vmcoreinfo_addr,
>+ &vmcoreinfo_len))
>+ return FALSE;
>+
>+ if (!vmcoreinfo_len)
>+ return FALSE;
>+
>+ DEBUG_MSG("sadump: Find vmcoreinfo in kdump memory\n");
>+
>+ if (!(buf = malloc(vmcoreinfo_len))) {
>+ ERRMSG("Can't allocate vmcoreinfo buffer.\n");
>+ return FALSE;
>+ }
>+
>+ if (!readmem(PADDR, vmcoreinfo_addr, buf, vmcoreinfo_len))
>+ goto finish;
>+
>+ pos = strstr(buf, STR_NUMBER("phys_base"));
>+ if (!pos)
>+ goto finish;
>+ *phys_base = strtoull(pos + strlen(STR_NUMBER("phys_base")), NULL, 0);
>+
>+ pos = strstr(buf, STR_KERNELOFFSET);
>+ if (!pos)
>+ goto finish;
>+ *kaslr_offset = strtoull(pos + strlen(STR_KERNELOFFSET), NULL, 16);
>+ ret = TRUE;
>+
>+finish:
>+ free(buf);
>+ return ret;
>+}
>+
>+/*
>+ * Calculate kaslr_offset and phys_base
>+ *
>+ * kaslr_offset:
>+ * The difference between original address in vmlinux and actual address
>+ * placed randomly by kaslr feature. To be more accurate,
>+ * kaslr_offset = actual address - original address
>+ *
>+ * phys_base:
>+ * Physical address where the kerenel is placed. In other words, it's a
>+ * physical address of __START_KERNEL_map. This is also decided randomly by
>+ * kaslr.
>+ *
>+ * kaslr offset and phys_base are calculated as follows:
>+ *
>+ * kaslr_offset:
>+ * 1) Get IDTR and CR3 value from the dump header.
>+ * 2) Get a virtual address of IDT from IDTR value
>+ * --- (A)
>+ * 3) Translate (A) to physical address using CR3, which points a top of
>+ * page table.
>+ * --- (B)
>+ * 4) Get an address of vector0 (Devide Error) interrupt handler from
>+ * IDT, which are pointed by (B).
>+ * --- (C)
>+ * 5) Get an address of symbol "divide_error" form vmlinux
>+ * --- (D)
>+ *
>+ * Now we have two addresses:
>+ * (C)-> Actual address of "divide_error"
>+ * (D)-> Original address of "divide_error" in the vmlinux
>+ *
>+ * kaslr_offset can be calculated by the difference between these two
>+ * value.
>+ *
>+ * phys_base;
>+ * 1) Get IDT virtual address from vmlinux
>+ * --- (E)
>+ *
>+ * So phys_base can be calculated using relationship of directly mapped
>+ * address.
>+ *
>+ * phys_base =
>+ * Physical address(B) -
>+ * (Virtual address(E) + kaslr_offset - __START_KERNEL_map)
>+ *
>+ * Note that the address (A) cannot be used instead of (E) because (A) is
>+ * not direct map address, it's a fixed map address.
>+ *
>+ * This solution works in most every case, but does not work in the
>+ * following case.
>+ *
>+ * 1) If the dump is captured on early stage of kernel boot, IDTR points
>+ * early IDT table(early_idts) instead of normal IDT(idt_table).
>+ * 2) If the dump is captured whle kdump is working, IDTR points
^i
>+ * IDT table of 2nd kernel, not 1st kernel.
These cases sound like only for outside dump mechanisms like sadump, right ?
I think the functions for the case 2) are extra features while calculating
kaslr_offset is an essential solution for the KASLR problem.
I hope you split this patch in two since it's large. Concretely,
>+ *
>+ * Current implementation does not support the case 1), need
>+ * enhancement in the future. For the case 2), get kaslr_offset and
>+ * phys_base as follows.
>+ *
>+ * 1) Get kaslr_offset and phys_base using the above solution.
>+ * 2) Get kernel boot parameter from "saved_command_line"
>+ * 3) If "elfcorehdr=" is not included in boot parameter, we are in the
>+ * first kernel, nothing to do any more.
>+ * 4) If "elfcorehdr=" is included in boot parameter, we are in the 2nd
>+ * kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and
>+ * get kaslr_offset and phys_base from vmcoreinfo.
>+ */
1) -> [PATCH 3/4]
2)-4) -> [PATCH 4/4]
Thanks,
Atsushi Kumagai
>+int
>+calc_kaslr_offset(void)
>+{
>+ struct sadump_header *sh = si->sh_memory;
>+ uint64_t idtr = 0, cr3 = 0, idtr_paddr;
>+ struct sadump_smram_cpu_state smram, zero;
>+ int apicid;
>+ unsigned long divide_error_vmcore, divide_error_vmlinux;
>+
>+ unsigned long kaslr_offset_kdump, phys_base_kdump;
>+ unsigned long kaslr_offset, phys_base;
>+
>+ memset(&zero, 0, sizeof(zero));
>+ for (apicid = 0; apicid < sh->nr_cpus; ++apicid) {
>+ if (!get_smram_cpu_state(apicid, &smram)) {
>+ ERRMSG("get_smram_cpu_state error\n");
>+ return FALSE;
>+ }
>+
>+ if (memcmp(&smram, &zero, sizeof(smram)) != 0)
>+ break;
>+ }
>+ if (apicid >= sh->nr_cpus) {
>+ ERRMSG("Can't get smram state\n");
>+ return FALSE;
>+ }
>+
>+ idtr = ((uint64_t)smram.IdtUpper)<<32 | (uint64_t)smram.IdtLower;
>+ cr3 = smram.Cr3;
>+
>+ /* Convert virtual address of IDT table to physical address */
>+ if ((idtr_paddr = vtop4_x86_64_pagetable(idtr, cr3)) == NOT_PADDR)
>+ return FALSE;
>+
>+ /* Now we can calculate kaslr_offset and phys_base */
>+ divide_error_vmlinux = SYMBOL(divide_error);
>+ divide_error_vmcore = get_vec0_addr(idtr_paddr);
>+ kaslr_offset = divide_error_vmcore - divide_error_vmlinux;
>+ phys_base = idtr_paddr -
>+ (SYMBOL(idt_table) + kaslr_offset - __START_KERNEL_map);
>+
>+ info->kaslr_offset = kaslr_offset;
>+ info->phys_base = phys_base;
>+
>+ DEBUG_MSG("sadump: idtr=%" PRIx64 "\n", idtr);
>+ DEBUG_MSG("sadump: cr3=%" PRIx64 "\n", cr3);
>+ DEBUG_MSG("sadump: idtr(phys)=%" PRIx64 "\n", idtr_paddr);
>+ DEBUG_MSG("sadump: devide_error(vmlinux)=%lx\n",
>+ divide_error_vmlinux);
>+ DEBUG_MSG("sadump: devide_error(vmcore)=%lx\n",
>+ divide_error_vmcore);
>+
>+ /* Reload symbol */
>+ if (!get_symbol_info())
>+ return FALSE;
>+
>+ /*
>+ * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd
>+ * kernel. If we are in 2nd kernel, get kaslr_offset/phys_base
>+ * from vmcoreinfo
>+ */
>+ if (get_kaslr_offset_from_vmcoreinfo(cr3, &kaslr_offset_kdump,
>+ &phys_base_kdump)) {
>+ info->kaslr_offset = kaslr_offset_kdump;
>+ info->phys_base = phys_base_kdump;
>+
>+ /* Reload symbol */
>+ if (!get_symbol_info())
>+ return FALSE;
>+ }
>+
>+ DEBUG_MSG("sadump: kaslr_offset=%lx\n", info->kaslr_offset);
>+ DEBUG_MSG("sadump: phys_base=%lx\n", info->phys_base);
>+
>+ return TRUE;
>+}
>+
> int
> sadump_virt_phys_base(void)
> {
>@@ -1065,6 +1469,9 @@ sadump_virt_phys_base(void)
> }
>
> failed:
>+ if (calc_kaslr_offset())
>+ return TRUE;
>+
> info->phys_base = 0;
>
> DEBUG_MSG("sadump: failed to calculate phys_base; default to 0\n");
>@@ -1518,10 +1925,14 @@ cpu_to_apicid(int cpu, int *apicid)
> if (!readmem(VADDR, SYMBOL(x86_bios_cpu_apicid_early_ptr),
> &early_ptr, sizeof(early_ptr)))
> return FALSE;
>-
>+ /*
>+ * Note: SYMBOL(name) value is adjusted by info->kaslr_offset,
>+ * but per_cpu symbol does not need to be adjusted becasue it
>+ * is not affected by kaslr.
>+ */
> apicid_addr = early_ptr
> ? SYMBOL(x86_bios_cpu_apicid_early_map)+cpu*sizeof(uint16_t)
>- : per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid), cpu);
>+ : per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid) - info->kaslr_offset, cpu);
>
> if (!readmem(VADDR, apicid_addr, &apicid_u16, sizeof(uint16_t)))
> return FALSE;
>--
>2.9.5
More information about the kexec
mailing list