makedumpfile fails on SGI machine

Jay Lan jlan at sgi.com
Wed Aug 27 21:43:43 EDT 2008


Ken'ichi Ohmichi wrote:
> Hi Bernhard,
> 
> Bernhard Walle wrote:
>>>> * Ken'ichi Ohmichi [2008-08-05 21:07]:
>>>>> BTW, I'd like to know some conditions of this problem.
>>>>> So please let me know the makedumpfile commandline which you run.
>>>>>  Ex. # makedumpfile -d 31 -x vmlinux /proc/vmcore dumpfile
>>>>   # makedumpfile -d 3 -D vmcore.sles11 vmcore.sles11.filtered
>>>>
>>>>> I doubt that makedumpfile cannot distinguish the pgtable correctly.
>>>>> I created the debugging patch. Please run makedumpfile-1.2.7 applied
>>>>> with it by '-D' option, and report the output.
>>>>> According to your kernel .config file, the output should contain
>>>>> "PAGETABLE_4L : ON".
>>>> See the attachment.
>>> Thanks for the log.
>>> makedumpfile can distinguish the pgtable correctly.
>>>
>>> Can the crash utility translate the virtual address 0xa0007fff8f900000
>>> to physical address ? Please run the following subcommand on the crash
>>> utility.
>>>
>>> crash> vtop 0xa0007fff8f900000
>> crash> vtop 0xa0007fff8f900000  
>> VIRTUAL           PHYSICAL        
>> a0007fff8f900000  (not mapped)
>>
>> PAGE DIRECTORY: a000000100914000
>>    PGD: a000000100914000 => 30030bc000
>>    PUD: e0000030030bfff8 => 30030c0000
>>    PMD: e0000030030c3e38 => 30030c4000
>>    PTE: e0000030030c7200 => 0
>>
>>
>> But I guess that's normal with virtual memory maps:
>>
>> $ zgrep VIRTUAL_M /proc/config.gz 
>> CONFIG_VIRTUAL_MEM_MAP=y
> 
> Thank you for your help.
> 
> I found an ia64 discontigmem kernel problem that the 'pgdat_list.node_mem_map'
> has an unexpected value, and I created the attached patch for avoiding the
> problem. If applying this patch, makedumpfile takes mem_map's information
> from a symbol 'vmem_map' instead of 'pgdat_list.node_mem_map'.
> It is for makedumpfile-1.2.8.  Could you please test it ?
> 
> Note:
>  This solution needs a symbol 'vmem_map', but vmcoreinfo data in /proc/vmcore
>  does not contain it. So "-x" option is necessary for running makedumpfile
>  like the following.
> 
>  # makedumpfile -c -d31 -x vmlinux /proc/vmcore dumpfile

Hi Ken'ichi,

Thanks for your patch!

I am wondering if the discontigmem kernel has a legitimate bug,
we probably should report it?

I tested your patch on a machine that used to fail in executing
'makedumpfile'. It now generated a dump file fine.

Running 4.0.7.1 crash against the dump file looked good!
I will test on other types machines later.

Thanks for your help (and Bernhard's) while i was chasing down
a nasty bug that panicked kdump kernel at boot with 2.6.27
kernel on an IA64 machine.

Regards,
 - jay

> 
> 
> Thanks
> Ken'ichi Ohmichi
> 
> diff -puN backup/v1.2.8/makedumpfile.c makedumpfile/makedumpfile.c
> --- backup/v1.2.8/makedumpfile.c	2008-08-12 12:17:24.000000000 +0900
> +++ makedumpfile/makedumpfile.c	2008-08-28 02:07:47.000000000 +0900
> @@ -1819,6 +1819,7 @@ get_symbol_info()
>  	 * Get symbol info.
>  	 */
>  	SYMBOL_INIT(mem_map, "mem_map");
> +	SYMBOL_INIT(vmem_map, "vmem_map");
>  	SYMBOL_INIT(mem_section, "mem_section");
>  	SYMBOL_INIT(pkmap_count, "pkmap_count");
>  	SYMBOL_INIT_NEXT(pkmap_count_next, "pkmap_count");
> @@ -2110,6 +2111,7 @@ generate_vmcoreinfo()
>  	 * write the symbol of 1st kernel
>  	 */
>  	WRITE_SYMBOL("mem_map", mem_map);
> +	WRITE_SYMBOL("vmem_map", vmem_map);
>  	WRITE_SYMBOL("mem_section", mem_section);
>  	WRITE_SYMBOL("pkmap_count", pkmap_count);
>  	WRITE_SYMBOL("pkmap_count_next", pkmap_count_next);
> @@ -2350,6 +2352,7 @@ read_vmcoreinfo()
>  		return FALSE;
>  
>  	READ_SYMBOL("mem_map", mem_map);
> +	READ_SYMBOL("vmem_map", vmem_map);
>  	READ_SYMBOL("mem_section", mem_section);
>  	READ_SYMBOL("pkmap_count", pkmap_count);
>  	READ_SYMBOL("pkmap_count_next", pkmap_count_next);
> @@ -2912,6 +2915,7 @@ get_mm_discontigmem()
>  {
>  	int i, j, id_mm, node, num_mem_map, separate_mm = FALSE;
>  	unsigned long pgdat, mem_map, pfn_start, pfn_end, node_spanned_pages;
> +	unsigned long vmem_map;
>  	struct mem_map_data temp_mmd;
>  
>  	num_mem_map = get_num_mm_discontigmem();
> @@ -2924,6 +2928,13 @@ get_mm_discontigmem()
>  		separate_mm = TRUE;
>  	}
>  
> +	if (SYMBOL(vmem_map) != NOT_FOUND_SYMBOL) {
> +		if (!readmem(VADDR, SYMBOL(vmem_map), &vmem_map, sizeof vmem_map)) {
> +			ERRMSG("Can't get vmem_map.\n");
> +			return FALSE;
> +		}
> +	}
> +
>  	/*
>  	 * Get the first node_id.
>  	 */
> @@ -2937,11 +2948,6 @@ get_mm_discontigmem()
>  	}
>  	id_mm = 0;
>  	for (i = 0; i < vt.numnodes; i++) {
> -		if (!readmem(VADDR, pgdat + OFFSET(pglist_data.node_mem_map),
> -		    &mem_map, sizeof mem_map)) {
> -			ERRMSG("Can't get mem_map.\n");
> -			return FALSE;
> -		}
>  		if (!readmem(VADDR, pgdat + OFFSET(pglist_data.node_start_pfn),
>  		    &pfn_start, sizeof pfn_start)) {
>  			ERRMSG("Can't get node_start_pfn.\n");
> @@ -2954,6 +2960,15 @@ get_mm_discontigmem()
>  		}
>  		pfn_end = pfn_start + node_spanned_pages;
>  
> +		if (SYMBOL(vmem_map) == NOT_FOUND_SYMBOL) {
> +			if (!readmem(VADDR, pgdat + OFFSET(pglist_data.node_mem_map),
> +			    &mem_map, sizeof mem_map)) {
> +				ERRMSG("Can't get mem_map.\n");
> +				return FALSE;
> +			}
> +		} else
> +			mem_map = vmem_map + SIZE(page) * pfn_start;
> +
>  		if (separate_mm) {
>  			/*
>  			 * For some ia64 NUMA systems.
> diff -puN backup/v1.2.8/makedumpfile.h makedumpfile/makedumpfile.h
> --- backup/v1.2.8/makedumpfile.h	2008-08-12 12:17:24.000000000 +0900
> +++ makedumpfile/makedumpfile.h	2008-08-28 02:06:40.000000000 +0900
> @@ -814,6 +814,7 @@ extern struct vm_table		vt;
>  
>  struct symbol_table {
>  	unsigned long long	mem_map;
> +	unsigned long long	vmem_map;
>  	unsigned long long	mem_section;
>  	unsigned long long	pkmap_count;
>  	unsigned long long	pkmap_count_next;
> 
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> kexec mailing list
> kexec at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec




More information about the kexec mailing list