[PATCH v3] makedumpfile: Support to filter dump for kernels that use CONFIG_SPARSEMEM_VMEMMAP

WANG Chao chaowang at redhat.com
Fri Nov 22 00:50:36 EST 2013


On 11/21/13 at 05:37pm, Hari Bathini wrote:
> Makedumpfile fails to filter dump for kernels build with CONFIG_SPARSEMEM_VMEMMAP
> enabled as it fails to do vmemmap translations. So far, makedumpfile on ppc64 never
> had to deal with vmemmap addresses (vmemmap regions) seperately to filter ppc64
> crash dumps as vmemmap regions where mapped in zone normal. But with the inclusion
> of CONFIG_SPARSEMEM_VMEMMAP config option in recent kernels, vmemmap memory regions
> are mapped outside zone normal. There is a need to handle vmemmap to physical address
> translation seperately in this scenario. This patch provides support in makedumpfile
> tool to do vmemmap to physical address translation when vmemmap regions are mapped
> outside zone normal. Some kernel symbols are needed in vmcoreinfo for this changes to
> be effective. The kernel patch that adds the necessary symbols to vmcoreinfo has been
> posted to linuxppc devel mailing list. This patch is influenced by vmemmap to physical
> address translation support code in crash utility. It is has been tested successfully
> at all dump filtering levels on kernel dumps that have CONFIG_SPARSEMEM_VMEMMAP enabled
> and kernel dumps with CONFIG_SPARSEMEM_VMEMMAP disabled as well. Also, successfully
> tested dump filtering on already filtered vmcores (re-filtering). The patch applies
> cleanly on version 1.5.4 of makedumpfile.
> 
> Changes from v2 to v3:
> 1. Changed 'flags' to a more specific name, 'flags_vmemmap'  in DumpInfo structure
> 2. Freeing vmemmap_buf in get_vmemmap_list_info(), in success scenario as well
> 
> Changes in v2:
> 1. Fixed return value when vmemmap list initialization fails
> 2. Fixed coding style issue

Hi, Hari

When I tried to apply your patch to devel branch, I find several
failures hence the patch does not apply. I have a few comments inline that
may be helpful for you to rebase your work on top of devel branch.

> 
> Signed-off-by: Onkar N Mahajan <onmahaja at in.ibm.com>
> Signed-off-by: Hari Bathini <hbathini at linux.vnet.ibm.com>
> ---
>  arch/ppc64.c   |  177 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  makedumpfile.c |   38 ++++++++++++
>  makedumpfile.h |   42 +++++++++++++
>  3 files changed, 251 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/ppc64.c b/arch/ppc64.c
> index c229ede..05738e7 100644
> --- a/arch/ppc64.c
> +++ b/arch/ppc64.c
> @@ -24,6 +24,154 @@
>  #include "../elf_info.h"
>  #include "../makedumpfile.h"
>  
> +/*
> + * This function traverses vmemmap list to get the count of vmemmap regions
> + * and populates the regions' info in info->vmemmap_list[]
> + */
> +static int
> +get_vmemmap_list_info(ulong head)
> +{
> +	int   i, cnt;
> +	long  backing_size, virt_addr_offset, phys_offset, list_offset;
> +	ulong curr, next;
> +	char  *vmemmap_buf = NULL;
> +
> +	backing_size		= SIZE(vmemmap_backing);
> +	virt_addr_offset	= OFFSET(vmemmap_backing.virt_addr);
> +	phys_offset		= OFFSET(vmemmap_backing.phys);
> +	list_offset		= OFFSET(vmemmap_backing.list);
> +	info->vmemmap_list = NULL;
> +
> +	/*
> +	 * Get list count by traversing the vmemmap list
> +	 */
> +	cnt = 0;
> +	curr = head;
> +	next = 0;
> +	do {
> +		if (!readmem(VADDR, (curr + list_offset), &next,
> +			     sizeof(next))) {
> +			ERRMSG("Can't get vmemmap region addresses\n");
> +			goto err;
> +		}
> +		curr = next;
> +		cnt++;
> +	} while ((next != 0) && (next != head));
> +
> +	/*
> +	 * Using temporary buffer to save vmemmap region information
> +	 */
> +	vmemmap_buf = calloc(1, backing_size);
> +	if (vmemmap_buf == NULL) {
> +		ERRMSG("Can't allocate memory for vmemmap_buf. %s\n",
> +		       strerror(errno));
> +		goto err;
> +	}
> +
> +	info->vmemmap_list = calloc(1, cnt * sizeof(struct ppc64_vmemmap));
> +	if (info->vmemmap_list == NULL) {
> +		ERRMSG("Can't allocate memory for vmemmap_list. %s\n",
> +		       strerror(errno));
> +		goto err;
> +	}
> +
> +	curr = head;
> +	for (i = 0; i < cnt; i++) {
> +		if (!readmem(VADDR, curr, vmemmap_buf, backing_size)) {
> +			ERRMSG("Can't get vmemmap region info\n");
> +			goto err;
> +		}
> +
> +		info->vmemmap_list[i].phys = ULONG(vmemmap_buf + phys_offset);
> +		info->vmemmap_list[i].virt = ULONG(vmemmap_buf +
> +						   virt_addr_offset);
> +		curr = ULONG(vmemmap_buf + list_offset);
> +
> +		if (info->vmemmap_list[i].virt < info->vmemmap_start)
> +			info->vmemmap_start = info->vmemmap_list[i].virt;
> +
> +		if ((info->vmemmap_list[i].virt + info->vmemmap_psize) >
> +		    info->vmemmap_end)
> +			info->vmemmap_end = (info->vmemmap_list[i].virt +
> +					     info->vmemmap_psize);
> +	}
> +
> +	free(vmemmap_buf);
> +	return cnt;
> +err:
> +	free(vmemmap_buf);
> +	free(info->vmemmap_list);
> +	return 0;
> +}
> +
> +/*
> + *  Verify that the kernel has made the vmemmap list available,
> + *  and if so, stash the relevant data required to make vtop
> + *  translations.
> + */
> +static int
> +ppc64_vmemmap_init(void)
> +{
> +	int psize, shift;
> +	ulong head;
> +
> +	if ((SYMBOL(vmemmap_list) == NOT_FOUND_SYMBOL)
> +	    || (SYMBOL(mmu_psize_defs) == NOT_FOUND_SYMBOL)
> +	    || (SYMBOL(mmu_vmemmap_psize) == NOT_FOUND_SYMBOL)
> +	    || (SIZE(vmemmap_backing) == NOT_FOUND_STRUCTURE)
> +	    || (SIZE(mmu_psize_def) == NOT_FOUND_STRUCTURE)
> +	    || (OFFSET(mmu_psize_def.shift) == NOT_FOUND_STRUCTURE)
> +	    || (OFFSET(vmemmap_backing.phys) == NOT_FOUND_STRUCTURE)
> +	    || (OFFSET(vmemmap_backing.virt_addr) == NOT_FOUND_STRUCTURE)
> +	    || (OFFSET(vmemmap_backing.list) == NOT_FOUND_STRUCTURE))
> +		return FALSE;
> +
> +	if (!readmem(VADDR, SYMBOL(mmu_vmemmap_psize), &psize, sizeof(int)))
> +		return FALSE;
> +
> +	if (!readmem(VADDR, SYMBOL(mmu_psize_defs) +
> +		     (SIZE(mmu_psize_def) * psize) +
> +		     OFFSET(mmu_psize_def.shift), &shift, sizeof(int)))
> +		return FALSE;
> +	info->vmemmap_psize = 1 << shift;
> +
> +	if (!readmem(VADDR, SYMBOL(vmemmap_list), &head, sizeof(unsigned long)))
> +		return FALSE;
> +
> +	/*
> +	 * Get vmemmap list count and populate vmemmap regions info
> +	 */
> +	info->vmemmap_cnt = get_vmemmap_list_info(head);
> +	if (info->vmemmap_cnt == 0)
> +		return FALSE;
> +
> +	info->flags_vmemmap |= VMEMMAP_AWARE;
> +	return TRUE;
> +}
> +
> +/*
> + *  If the vmemmap address translation information is stored in the kernel,
> + *  make the translation.
> + */
> +static unsigned long long
> +ppc64_vmemmap_to_phys(unsigned long vaddr)
> +{
> +	int	i;
> +	ulong	offset;
> +	unsigned long long paddr = NOT_PADDR;
> +
> +	for (i = 0; i < info->vmemmap_cnt; i++) {
> +		if ((vaddr >= info->vmemmap_list[i].virt) && (vaddr <
> +		    (info->vmemmap_list[i].virt + info->vmemmap_psize))) {
> +			offset = vaddr - info->vmemmap_list[i].virt;
> +			paddr = info->vmemmap_list[i].phys + offset;
> +			break;
> +		}
> +	}
> +
> +	return paddr;
> +}
> +
>  int
>  set_ppc64_max_physmem_bits(void)
>  {
> @@ -49,7 +197,7 @@ set_ppc64_max_physmem_bits(void)
>  int
>  get_machdep_info_ppc64(void)
>  {
> -	unsigned long vmlist, vmalloc_start;
> +	unsigned long vmlist, vmap_area_list, vmalloc_start;

This is already fixed in devel branch:

commit 150b58e
Author: Baoquan He <bhe at redhat.com>
Date:   Mon Jul 15 20:37:14 2013 +0800

    [PATCH] Add vmap_area_list definition for ppc/ppc64.


>  
>  	info->section_size_bits = _SECTION_SIZE_BITS;
>  	if (!set_ppc64_max_physmem_bits()) {
> @@ -103,6 +251,16 @@ get_machdep_info_ppc64(void)
>  	info->vmalloc_start = vmalloc_start;
>  	DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start);
>  
> +	if (SYMBOL(vmemmap_list) != NOT_FOUND_SYMBOL) {
> +		info->vmemmap_start = VMEMMAP_REGION_ID << REGION_SHIFT;
> +		info->vmemmap_end = info->vmemmap_start;
> +		if (ppc64_vmemmap_init() == FALSE) {
> +			ERRMSG("Can't get vmemmap list info.\n");
> +			return FALSE;
> +		}
> +		DEBUG_MSG("vmemmap_start: %lx\n", info->vmemmap_start);
> +	}
> +
>  	return TRUE;
>  }
>  
> @@ -121,14 +279,23 @@ vaddr_to_paddr_ppc64(unsigned long vaddr)
>  	if (paddr != NOT_PADDR)
>  		return paddr;
>  
> -	if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL)
> -	    || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) {
> -		ERRMSG("Can't get necessary information for vmalloc translation.\n");
> -		return NOT_PADDR;
> +	if ((SYMBOL(vmap_area_list) == NOT_FOUND_SYMBOL)
> +	    || (OFFSET(vmap_area.va_start) == NOT_FOUND_STRUCTURE)
> +	    || (OFFSET(vmap_area.list) == NOT_FOUND_STRUCTURE)) {
> +		if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL)
> +		    || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) {
> +			ERRMSG("Can't get info for vmalloc translation.\n");
> +			return NOT_PADDR;
> +		}
>  	}
>  	if (!is_vmalloc_addr_ppc64(vaddr))
>  		return (vaddr - info->kernel_start);
>  
> +	if ((info->flags_vmemmap && VMEMMAP_AWARE)
> +	    && (vaddr >= info->vmemmap_start)) {
> +		return ppc64_vmemmap_to_phys(vaddr);
> +	}
> +
>  	/*
>  	 * TODO: Support vmalloc translation.
>  	 */
> diff --git a/makedumpfile.c b/makedumpfile.c
> index b42565c..5035fce 100644
> --- a/makedumpfile.c
> +++ b/makedumpfile.c
> @@ -1097,6 +1097,10 @@ get_symbol_info(void)
>  		SYMBOL_ARRAY_LENGTH_INIT(node_remap_start_pfn,
>  					"node_remap_start_pfn");
>  
> +	SYMBOL_INIT(vmemmap_list, "vmemmap_list");
> +	SYMBOL_INIT(mmu_psize_defs, "mmu_psize_defs");
> +	SYMBOL_INIT(mmu_vmemmap_psize, "mmu_vmemmap_psize");
> +
>  	return TRUE;
>  }
>  
> @@ -1394,6 +1398,20 @@ get_structure_info(void)
>  	OFFSET_INIT(log.len, "log", "len");
>  	OFFSET_INIT(log.text_len, "log", "text_len");
>  
> +	/*
> +	 * Get offsets of the vmemmap_backing's members.
> +	 */
> +	SIZE_INIT(vmemmap_backing, "vmemmap_backing");
> +	OFFSET_INIT(vmemmap_backing.phys, "vmemmap_backing", "phys");
> +	OFFSET_INIT(vmemmap_backing.virt_addr, "vmemmap_backing", "virt_addr");
> +	OFFSET_INIT(vmemmap_backing.list, "vmemmap_backing", "list");
> +
> +	/*
> +	 * Get offsets of the mmu_psize_def's members.
> +	 */
> +	SIZE_INIT(mmu_psize_def, "mmu_psize_def");
> +	OFFSET_INIT(mmu_psize_def.shift, "mmu_psize_def", "shift");
> +
>  	return TRUE;
>  }
>  
> @@ -1580,6 +1598,9 @@ write_vmcoreinfo_data(void)
>  	WRITE_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr);
>  	WRITE_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr);
>  	WRITE_SYMBOL("node_remap_start_pfn", node_remap_start_pfn);
> +	WRITE_SYMBOL("vmemmap_list", vmemmap_list);
> +	WRITE_SYMBOL("mmu_psize_defs", mmu_psize_defs);
> +	WRITE_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize);
>  
>  	/*
>  	 * write the structure size of 1st kernel
> @@ -1594,6 +1615,8 @@ write_vmcoreinfo_data(void)
>  	WRITE_STRUCTURE_SIZE("nodemask_t", nodemask_t);
>  	WRITE_STRUCTURE_SIZE("pageflags", pageflags);
>  	WRITE_STRUCTURE_SIZE("log", log);

Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg:
Understand >= v3.11-rc4 dmesg.)"

> +	WRITE_STRUCTURE_SIZE("vmemmap_backing", vmemmap_backing);
> +	WRITE_STRUCTURE_SIZE("mmu_psize_def", mmu_psize_def);
>  
>  	/*
>  	 * write the member offset of 1st kernel
> @@ -1631,6 +1654,11 @@ write_vmcoreinfo_data(void)
>  	WRITE_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec);
>  	WRITE_MEMBER_OFFSET("log.len", log.len);
>  	WRITE_MEMBER_OFFSET("log.text_len", log.text_len);

Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg:
Understand >= v3.11-rc4 dmesg.)"

> +	WRITE_MEMBER_OFFSET("vmemmap_backing.phys", vmemmap_backing.phys);
> +	WRITE_MEMBER_OFFSET("vmemmap_backing.virt_addr",
> +	    vmemmap_backing.virt_addr);
> +	WRITE_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list);
> +	WRITE_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift);
>  
>  	if (SYMBOL(node_data) != NOT_FOUND_SYMBOL)
>  		WRITE_ARRAY_LENGTH("node_data", node_data);
> @@ -1899,6 +1927,9 @@ read_vmcoreinfo(void)
>  	READ_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr);
>  	READ_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr);
>  	READ_SYMBOL("node_remap_start_pfn", node_remap_start_pfn);
> +	READ_SYMBOL("vmemmap_list", vmemmap_list);
> +	READ_SYMBOL("mmu_psize_defs", mmu_psize_defs);
> +	READ_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize);
>  
>  	READ_STRUCTURE_SIZE("page", page);
>  	READ_STRUCTURE_SIZE("mem_section", mem_section);
> @@ -1910,6 +1941,8 @@ read_vmcoreinfo(void)
>  	READ_STRUCTURE_SIZE("nodemask_t", nodemask_t);
>  	READ_STRUCTURE_SIZE("pageflags", pageflags);
>  	READ_STRUCTURE_SIZE("log", log);

Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg:
Understand >= v3.11-rc4 dmesg.)"

> +	READ_STRUCTURE_SIZE("vmemmap_backing", vmemmap_backing);
> +	READ_STRUCTURE_SIZE("mmu_psize_def", mmu_psize_def);
>  
>  	READ_MEMBER_OFFSET("page.flags", page.flags);
>  	READ_MEMBER_OFFSET("page._count", page._count);
> @@ -1943,6 +1976,11 @@ read_vmcoreinfo(void)
>  	READ_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec);
>  	READ_MEMBER_OFFSET("log.len", log.len);
>  	READ_MEMBER_OFFSET("log.text_len", log.text_len);

Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg:
Understand >= v3.11-rc4 dmesg.)"

> +	READ_MEMBER_OFFSET("vmemmap_backing.phys", vmemmap_backing.phys);
> +	READ_MEMBER_OFFSET("vmemmap_backing.virt_addr",
> +	    vmemmap_backing.virt_addr);
> +	READ_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list);
> +	READ_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift);
>  
>  	READ_ARRAY_LENGTH("node_data", node_data);
>  	READ_ARRAY_LENGTH("pgdat_list", pgdat_list);
> diff --git a/makedumpfile.h b/makedumpfile.h
> index a5826e0..a142243 100644
> --- a/makedumpfile.h
> +++ b/makedumpfile.h
> @@ -576,6 +576,9 @@ do { \
>  #define _SECTION_SIZE_BITS	(24)
>  #define _MAX_PHYSMEM_BITS_ORIG  (44)
>  #define _MAX_PHYSMEM_BITS_3_7   (46)
> +#define REGION_SHIFT            (60UL)
> +#define VMEMMAP_REGION_ID       (0xfUL)
> +#define VMEMMAP_AWARE           (0x4)
>  #endif
>  
>  #ifdef __powerpc32__
> @@ -862,6 +865,11 @@ struct splitting_info {
>  	unsigned long		size_eraseinfo;
>  } splitting_info_t;
>  
> +struct ppc64_vmemmap {
> +	unsigned long		phys;
> +	unsigned long		virt;
> +};
> +
>  struct DumpInfo {
>  	int32_t		kernel_version;      /* version of first kernel*/
>  	struct timeval	timestamp;
> @@ -908,6 +916,14 @@ struct DumpInfo {
>  	unsigned long   vmalloc_end;
>  	unsigned long	vmemmap_start;
>  	unsigned long	vmemmap_end;
> +	int		vmemmap_psize;
> +	int		vmemmap_cnt;
> +	struct ppc64_vmemmap	*vmemmap_list;
> +	unsigned long	flags_vmemmap;
> +
> +	/*
> +	 * for vmemmap
> +	 */
>  
>  	/*
>  	 * Filter config file containing filter commands to filter out kernel
> @@ -1093,7 +1109,6 @@ struct module_info {
>  	struct symbol_info	*sym_info;
>  };
>  
> -
>  struct symbol_table {
>  	unsigned long long	mem_map;
>  	unsigned long long	vmem_map;
> @@ -1165,6 +1180,13 @@ struct symbol_table {
>  	unsigned long long	__per_cpu_load;
>  	unsigned long long	cpu_online_mask;
>  	unsigned long long	kexec_crash_image;
> +
> +	/*
> +	 * vmemmap symbols on ppc64 arch
> +	 */
> +	unsigned long long		vmemmap_list;
> +	unsigned long long		mmu_vmemmap_psize;
> +	unsigned long long		mmu_psize_defs;
>  };
>  
>  struct size_table {
> @@ -1200,6 +1222,12 @@ struct size_table {
>  	long	elf64_hdr;
>  	long	log;

Above line has removed in commit a01b663 ("[PATCH v2] dump-dmesg:
Understand >= v3.11-rc4 dmesg.)"

>  
> +	/*
> +	 * vmemmap symbols on ppc64 arch
> +	 */
> +	long	vmemmap_backing;
> +	long	mmu_psize_def;
> +
>  	long	pageflags;
>  };
>  
> @@ -1343,6 +1371,18 @@ struct offset_table {
>  		long text_len;
>  	} log;

Above line has changed in commit a01b663 ("[PATCH v2] dump-dmesg:
Understand >= v3.11-rc4 dmesg.)"


Thanks
WANG Chao

>  
> +	/*
> +	 * vmemmap symbols on ppc64 arch
> +	 */
> +	struct mmu_psize_def {
> +		long	shift;
> +	} mmu_psize_def;
> +
> +	struct vmemmap_backing {
> +		long	phys;
> +		long	virt_addr;
> +		long	list;
> +	} vmemmap_backing;
>  };
>  
>  /*
> 
> 
> _______________________________________________
> kexec mailing list
> kexec at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec



More information about the kexec mailing list