[PATCH V7] makedumpfile: exclude page structures of non-dumped pages

Atsushi Kumagai ats-kumagai at wm.jp.nec.com
Sun May 22 18:36:16 PDT 2016


Hello Cliff,

>>From: Cliff Wickman <cpw at sgi.com>
>>
>>Applies to the development branch as of 10/13/2015.
>>Incorporates review 10/22 by kumagai-atsushi.
>>Incorporates review 10/26 by kumagai-atsushi. (arch x86_64-only implementation)
>
>Thanks for your great work.
>I don't want to delay v1.5.9 release anymore, I'll merge this patch
>into v1.6.0.

Now I'm testing for v1.6.0 release, I found that this feature
will be disabled on some environments.

[skip to find_vmemmap_x86_64()]

>>+/*
>>+ * Scan the kernel page table for the pfn's of the page structs
>>+ * Place them in array gvmem_pfns[nr_gvmem_pfns]
>>+ */
>>+int
>>+find_vmemmap_x86_64()
>>+{
>>+	int i;
>>+	int pgd_index, pud_index;
>>+	int start_range = 1;
>>+	int num_pmds=0, num_pmds_valid=0;
>>+	int break_in_valids, break_after_invalids;
>>+	int do_break, done = 0;
>>+	int last_valid=0, last_invalid=0;
>>+	int pagestructsize, structsperhpage, hugepagesize;
>>+	long page_structs_per_pud;
>>+	long num_puds, groups = 0;
>>+	long pgdindex, pudindex, pmdindex;
>>+	long vaddr, vaddr_base;
>>+	long rep_pfn_start = 0, rep_pfn_end = 0;
>>+	unsigned long init_level4_pgt;
>>+	unsigned long max_paddr, high_pfn;
>>+	unsigned long pgd_addr, pud_addr, pmd_addr;
>>+	unsigned long *pgdp, *pudp, *pmdp;
>>+	unsigned long pud_page[PTRS_PER_PUD];
>>+	unsigned long pmd_page[PTRS_PER_PMD];
>>+	unsigned long vmap_offset_start = 0, vmap_offset_end = 0;
>>+	unsigned long pmd, tpfn;
>>+	unsigned long pvaddr = 0;
>>+	unsigned long data_addr = 0, last_data_addr = 0, start_data_addr = 0;
>>+	/*
>>+	 * data_addr is the paddr of the page holding the page structs.
>>+	 * We keep lists of contiguous pages and the pfn's that their
>>+	 * page structs represent.
>>+	 *  start_data_addr and last_data_addr mark start/end of those
>>+	 *  contiguous areas.
>>+	 * An area descriptor is vmap start/end pfn and rep start/end
>>+	 *  of the pfn's represented by the vmap start/end.
>>+	 */
>>+	struct vmap_pfns *vmapp, *vmaphead = NULL, *cur, *tail;
>>+
>>+	init_level4_pgt = SYMBOL(init_level4_pgt);
>>+	if (init_level4_pgt == NOT_FOUND_SYMBOL) {
>>+		ERRMSG("init_level4_pgt not found\n");
>>+		return FAILED;
>>+	}
>>+	pagestructsize = size_table.page;
>>+	hugepagesize = PTRS_PER_PMD * info->page_size;
>>+	vaddr_base = info->vmemmap_start;
>>+	vaddr = vaddr_base;
>>+	max_paddr = get_max_paddr();
>>+	/*
>>+	 * the page structures are mapped at VMEMMAP_START (info->vmemmap_start)
>>+	 * for max_paddr >> 12 page structures
>>+	 */
>>+	high_pfn = max_paddr >> 12;
>>+	pgd_index = pgd4_index(vaddr_base);
>>+	pud_index = pud_index(vaddr_base);
>>+	pgd_addr = vaddr_to_paddr(init_level4_pgt); /* address of pgd */
>>+	pgd_addr += pgd_index * sizeof(unsigned long);
>>+	page_structs_per_pud = (PTRS_PER_PUD * PTRS_PER_PMD * info->page_size) /
>>+									pagestructsize;
>>+	num_puds = (high_pfn + page_structs_per_pud - 1) / page_structs_per_pud;
>>+	pvaddr = VMEMMAP_START;
>>+	structsperhpage = hugepagesize / pagestructsize;
>>+
>>+	/* outer loop is for pud entries in the pgd */
>>+	for (pgdindex = 0, pgdp = (unsigned long *)pgd_addr; pgdindex < num_puds;
>>+								pgdindex++, pgdp++) {
>>+		/* read the pgd one word at a time, into pud_addr */
>>+		if (!readmem(PADDR, (unsigned long long)pgdp, (void *)&pud_addr,
>>+								sizeof(unsigned long))) {
>>+			ERRMSG("Can't get pgd entry for slot %d.\n", pgd_index);
>>+			return FAILED;
>>+		}
>>+		/* mask the pgd entry for the address of the pud page */
>>+		pud_addr &= PMASK;

If the pgd entry is null, the code below will try to read 0x0.
This behavior will cause unexpected results, especially if pfn:0 is on
memory hole.
I think null pgd (and pud) entries should be skipped, how about you ?
I would like you to post the bug fix if you have no objection.


Thanks,
Atsushi Kumagai

>>+		/* read the entire pud page */
>>+		if (!readmem(PADDR, (unsigned long long)pud_addr, (void *)pud_page,
>>+					PTRS_PER_PUD * sizeof(unsigned long))) {
>>+			ERRMSG("Can't get pud entry for pgd slot %ld.\n", pgdindex);
>>+			return FAILED;
>>+		}
>>+		/* step thru each pmd address in the pud page */
>>+		/* pudp points to an entry in the pud page */
>>+		for (pudp = (unsigned long *)pud_page, pudindex = 0;
>>+					pudindex < PTRS_PER_PUD; pudindex++, pudp++) {
>>+			pmd_addr = *pudp & PMASK;
>>+			/* read the entire pmd page */
>>+			if (!readmem(PADDR, pmd_addr, (void *)pmd_page,
>>+					PTRS_PER_PMD * sizeof(unsigned long))) {
>>+				ERRMSG("Can't get pud entry for slot %ld.\n", pudindex);
>>+				return FAILED;
>>+			}
>>+			/* pmdp points to an entry in the pmd */
>>+			for (pmdp = (unsigned long *)pmd_page, pmdindex = 0;
>>+					pmdindex < PTRS_PER_PMD; pmdindex++, pmdp++) {
>>+				/* linear page position in this page table: */
>>+				pmd = *pmdp;
>>+				num_pmds++;
>>+				tpfn = (pvaddr - VMEMMAP_START) /
>>+							pagestructsize;
>>+				if (tpfn >= high_pfn) {
>>+					done = 1;
>>+					break;
>>+				}
>>+				/*
>>+				 * vmap_offset_start:
>>+				 * Starting logical position in the
>>+				 * vmemmap array for the group stays
>>+				 * constant until a hole in the table
>>+				 * or a break in contiguousness.
>>+				 */
>>+
>>+				/*
>>+				 * Ending logical position in the
>>+				 * vmemmap array:
>>+				 */
>>+				vmap_offset_end += hugepagesize;
>>+				do_break = 0;
>>+				break_in_valids = 0;
>>+				break_after_invalids = 0;
>>+				/*
>>+				 * We want breaks either when:
>>+				 * - we hit a hole (invalid)
>>+				 * - we discontiguous page is a string of valids
>>+				 */
>>+				if (pmd) {
>>+					data_addr = (pmd & PMASK);
>>+					if (start_range) {
>>+						/* first-time kludge */
>>+						start_data_addr = data_addr;
>>+						last_data_addr = start_data_addr
>>+							 - hugepagesize;
>>+						start_range = 0;
>>+					}
>>+					if (last_invalid) {
>>+						/* end of a hole */
>>+						start_data_addr = data_addr;
>>+						last_data_addr = start_data_addr
>>+							 - hugepagesize;
>>+						/* trigger update of offset */
>>+						do_break = 1;
>>+					}
>>+					last_valid = 1;
>>+					last_invalid = 0;
>>+					/*
>>+					 * we have a gap in physical
>>+					 * contiguousness in the table.
>>+					 */
>>+					/* ?? consecutive holes will have
>>+					   same data_addr */
>>+					if (data_addr !=
>>+						last_data_addr + hugepagesize) {
>>+						do_break = 1;
>>+						break_in_valids = 1;
>>+					}
>>+					DEBUG_MSG("valid: pud %ld pmd %ld pfn %#lx"
>>+						" pvaddr %#lx pfns %#lx-%lx"
>>+						" start %#lx end %#lx\n",
>>+						pudindex, pmdindex,
>>+						data_addr >> 12,
>>+						pvaddr, tpfn,
>>+						tpfn + structsperhpage - 1,
>>+						vmap_offset_start,
>>+						vmap_offset_end);
>>+					num_pmds_valid++;
>>+					if (!(pmd & _PAGE_PSE)) {
>>+						printf("vmemmap pmd not huge, abort\n");
>>+						return FAILED;
>>+					}
>>+				} else {
>>+					if (last_valid) {
>>+						/* this a hole after some valids */
>>+						do_break = 1;
>>+						break_in_valids = 1;
>>+						break_after_invalids = 0;
>>+					}
>>+					last_valid = 0;
>>+					last_invalid = 1;
>>+					/*
>>+					 * There are holes in this sparsely
>>+					 * populated table; they are 2MB gaps
>>+					 * represented by null pmd entries.
>>+					 */
>>+					DEBUG_MSG("invalid: pud %ld pmd %ld %#lx"
>>+						" pfns %#lx-%lx start %#lx end"
>>+						" %#lx\n", pudindex, pmdindex,
>>+						pvaddr, tpfn,
>>+						tpfn + structsperhpage - 1,
>>+						vmap_offset_start,
>>+						vmap_offset_end);
>>+				}
>>+				if (do_break) {
>>+					/* The end of a hole is not summarized.
>>+					 * It must be the start of a hole or
>>+					 * hitting a discontiguous series.
>>+					 */
>>+					if (break_in_valids || break_after_invalids) {
>>+						/*
>>+						 * calculate that pfns
>>+						 * represented by the current
>>+						 * offset in the vmemmap.
>>+						 */
>>+						/* page struct even partly on this page */
>>+						rep_pfn_start = vmap_offset_start /
>>+							pagestructsize;
>>+						/* ending page struct entirely on
>>+ 						   this page */
>>+						rep_pfn_end = ((vmap_offset_end -
>>+							hugepagesize) / pagestructsize);
>>+ 						DEBUG_MSG("vmap pfns %#lx-%lx "
>>+							"represent pfns %#lx-%lx\n\n",
>>+							start_data_addr >> PAGESHIFT(),
>>+							last_data_addr >> PAGESHIFT(),
>>+							rep_pfn_start, rep_pfn_end);
>>+						groups++;
>>+						vmapp = (struct vmap_pfns *)malloc(
>>+								sizeof(struct vmap_pfns));
>>+						/* pfn of this 2MB page of page structs */
>>+						vmapp->vmap_pfn_start = start_data_addr
>>+									>> PTE_SHIFT;
>>+						vmapp->vmap_pfn_end = last_data_addr
>>+									>> PTE_SHIFT;
>>+						/* these (start/end) are literal pfns
>>+ 						 * on this page, not start and end+1 */
>>+						vmapp->rep_pfn_start = rep_pfn_start;
>>+						vmapp->rep_pfn_end = rep_pfn_end;
>>+
>>+						if (!vmaphead) {
>>+							vmaphead = vmapp;
>>+							vmapp->next = vmapp;
>>+							vmapp->prev = vmapp;
>>+						} else {
>>+							tail = vmaphead->prev;
>>+							vmaphead->prev = vmapp;
>>+							tail->next = vmapp;
>>+							vmapp->next = vmaphead;
>>+							vmapp->prev = tail;
>>+						}
>>+					}
>>+
>>+					/* update logical position at every break */
>>+					vmap_offset_start =
>>+						vmap_offset_end - hugepagesize;
>>+					start_data_addr = data_addr;
>>+				}
>>+
>>+				last_data_addr = data_addr;
>>+				pvaddr += hugepagesize;
>>+				/*
>>+				 * pvaddr is current virtual address
>>+				 *   eg 0xffffea0004200000 if
>>+				 *    vmap_offset_start is 4200000
>>+				 */
>>+			}
>>+		}
>>+		tpfn = (pvaddr - VMEMMAP_START) / pagestructsize;
>>+		if (tpfn >= high_pfn) {
>>+			done = 1;
>>+			break;
>>+		}
>>+	}
>>+	rep_pfn_start = vmap_offset_start / pagestructsize;
>>+	rep_pfn_end = (vmap_offset_end - hugepagesize) / pagestructsize;
>>+	DEBUG_MSG("vmap pfns %#lx-%lx represent pfns %#lx-%lx\n\n",
>>+		start_data_addr >> PAGESHIFT(), last_data_addr >> PAGESHIFT(),
>>+		rep_pfn_start, rep_pfn_end);
>>+	groups++;
>>+	vmapp = (struct vmap_pfns *)malloc(sizeof(struct vmap_pfns));
>>+	vmapp->vmap_pfn_start = start_data_addr >> PTE_SHIFT;
>>+	vmapp->vmap_pfn_end = last_data_addr >> PTE_SHIFT;
>>+	vmapp->rep_pfn_start = rep_pfn_start;
>>+	vmapp->rep_pfn_end = rep_pfn_end;
>>+	if (!vmaphead) {
>>+		vmaphead = vmapp;
>>+		vmapp->next = vmapp;
>>+		vmapp->prev = vmapp;
>>+	} else {
>>+		tail = vmaphead->prev;
>>+		vmaphead->prev = vmapp;
>>+		tail->next = vmapp;
>>+		vmapp->next = vmaphead;
>>+		vmapp->prev = tail;
>>+	}
>>+	DEBUG_MSG("num_pmds: %d num_pmds_valid %d\n", num_pmds, num_pmds_valid);
>>+
>>+	/* transfer the linked list to an array */
>>+	cur = vmaphead;
>>+	gvmem_pfns = (struct vmap_pfns *)malloc(sizeof(struct vmap_pfns) * groups);
>>+	i = 0;
>>+	do {
>>+		vmapp = gvmem_pfns + i;
>>+		vmapp->vmap_pfn_start = cur->vmap_pfn_start;
>>+		vmapp->vmap_pfn_end = cur->vmap_pfn_end;
>>+		vmapp->rep_pfn_start = cur->rep_pfn_start;
>>+		vmapp->rep_pfn_end = cur->rep_pfn_end;
>>+		cur = cur->next;
>>+		free(cur->prev);
>>+		i++;
>>+	} while (cur != vmaphead);
>>+	nr_gvmem_pfns = i;
>>+	return COMPLETED;
>>+}
>>+
>> #endif /* x86_64 */
>>
>
>_______________________________________________
>kexec mailing list
>kexec at lists.infradead.org
>http://lists.infradead.org/mailman/listinfo/kexec



More information about the kexec mailing list