[PATCH] makedumpfile: ppc64: do page traversal if vmemmap_list not populated

Aditya Gupta adityag at linux.ibm.com
Thu Sep 14 02:22:23 PDT 2023


Currently 'makedumpfile' fails to collect vmcore on upstream kernel,
with the errors:

    readpage_elf: Attempt to read non-existent page at 0x4000000000000000.
    readmem: type_addr: 0, addr:0, size:8
    get_vmemmap_list_info: Can't get vmemmap region addresses
    get_machdep_info_ppc64: Can't get vmemmap list info.

This occurs since makedumpfile depends on 'vmemmap_list' for translating
vmemmap addresses. But with below commit in Linux, vmemmap_list can be
empty, in case of Radix MMU on PowerPC64

    368a0590d954: (powerpc/book3s64/vmemmap: switch radix to use a
    different vmemmap handling function)

In case vmemmap_list is empty, then it's head is NULL, which causes
makedumpfile to fail with above error.

Since with above commit, 'vmemmap_list' is not populated (when MMU is
Radix MMU), kernel populates corresponding page table entries in kernel
page table. Hence, instead of depending on 'vmemmap_list' for address
translation for vmemmap addresses, do a kernel pagetable walk.

And since the pte can also be introduced at higher levels in the page
table, such as at PMD level, add hugepage support, by checking for
PAGE_PTE flag

Reported-by: Sachin Sant <sachinp at linux.ibm.com>
Signed-off-by: Aditya Gupta <adityag at linux.ibm.com>
---
 arch/ppc64.c   | 111 ++++++++++++++++++++++++++++++++++---------------
 makedumpfile.h |   6 +++
 2 files changed, 84 insertions(+), 33 deletions(-)

diff --git a/arch/ppc64.c b/arch/ppc64.c
index 5e70acb51aba..9456b8b570c5 100644
--- a/arch/ppc64.c
+++ b/arch/ppc64.c
@@ -196,6 +196,10 @@ ppc64_vmemmap_init(void)
 	int psize, shift;
 	ulong head;
 
+	/* initialise vmemmap_list in case SYMBOL(vmemmap_list) is not found */
+	info->vmemmap_list = NULL;
+	info->vmemmap_cnt = 0;
+	
 	if ((SYMBOL(vmemmap_list) == NOT_FOUND_SYMBOL)
 	    || (SYMBOL(mmu_psize_defs) == NOT_FOUND_SYMBOL)
 	    || (SYMBOL(mmu_vmemmap_psize) == NOT_FOUND_SYMBOL)
@@ -216,15 +220,24 @@ ppc64_vmemmap_init(void)
 		return FALSE;
 	info->vmemmap_psize = 1 << shift;
 
-	if (!readmem(VADDR, SYMBOL(vmemmap_list), &head, sizeof(unsigned long)))
-		return FALSE;
-
 	/*
-	 * Get vmemmap list count and populate vmemmap regions info
-	 */
-	info->vmemmap_cnt = get_vmemmap_list_info(head);
-	if (info->vmemmap_cnt == 0)
-		return FALSE;
+	 * vmemmap_list symbol can be missing or set to 0 in the kernel.
+	 * This would imply vmemmap region is mapped in the kernel pagetable.
+	 *
+	 * So, read vmemmap_list anyway, and use 'vmemmap_list' if it's not empty
+	 * (head != NULL), or we will do a kernel pagetable walk for vmemmap address
+	 * translation later
+	 **/
+	readmem(VADDR, SYMBOL(vmemmap_list), &head, sizeof(unsigned long));
+
+	if (head) {
+		/*
+		 * Get vmemmap list count and populate vmemmap regions info
+		 */
+		info->vmemmap_cnt = get_vmemmap_list_info(head);
+		if (info->vmemmap_cnt == 0)
+			return FALSE;
+	}
 
 	info->flag_vmemmap = TRUE;
 	return TRUE;
@@ -347,29 +360,6 @@ ppc64_vmalloc_init(void)
 	return TRUE;
 }
 
-/*
- *  If the vmemmap address translation information is stored in the kernel,
- *  make the translation.
- */
-static unsigned long long
-ppc64_vmemmap_to_phys(unsigned long vaddr)
-{
-	int	i;
-	ulong	offset;
-	unsigned long long paddr = NOT_PADDR;
-
-	for (i = 0; i < info->vmemmap_cnt; i++) {
-		if ((vaddr >= info->vmemmap_list[i].virt) && (vaddr <
-		    (info->vmemmap_list[i].virt + info->vmemmap_psize))) {
-			offset = vaddr - info->vmemmap_list[i].virt;
-			paddr = info->vmemmap_list[i].phys + offset;
-			break;
-		}
-	}
-
-	return paddr;
-}
-
 static unsigned long long
 ppc64_vtop_level4(unsigned long vaddr)
 {
@@ -379,6 +369,8 @@ ppc64_vtop_level4(unsigned long vaddr)
 	unsigned long long pgd_pte, pud_pte;
 	unsigned long long pmd_pte, pte;
 	unsigned long long paddr = NOT_PADDR;
+	uint is_hugepage = 0;
+	uint pdshift;
 	uint swap = 0;
 
 	if (info->page_buf == NULL) {
@@ -413,6 +405,13 @@ ppc64_vtop_level4(unsigned long vaddr)
 	if (!pgd_pte)
 		return NOT_PADDR;
 
+	if (IS_HUGEPAGE(pgd_pte)) {
+		is_hugepage = 1;
+		pte = pgd_pte;
+		pdshift = info->l4_shift;
+		goto out;
+	}
+
 	/*
 	 * Sometimes we don't have level3 pagetable entries
 	 */
@@ -426,6 +425,13 @@ ppc64_vtop_level4(unsigned long vaddr)
 		pud_pte = swap64(ULONG((info->page_buf + PAGEOFFSET(page_upper))), swap);
 		if (!pud_pte)
 			return NOT_PADDR;
+
+		if (IS_HUGEPAGE(pud_pte)) {
+			is_hugepage = 1;
+			pte = pud_pte;
+			pdshift = info->l3_shift;
+			goto out;
+		}
 	} else {
 		pud_pte = pgd_pte;
 	}
@@ -440,6 +446,13 @@ ppc64_vtop_level4(unsigned long vaddr)
 	if (!(pmd_pte))
 		return NOT_PADDR;
 
+	if (IS_HUGEPAGE(pmd_pte)) {
+		is_hugepage = 1;
+		pte = pmd_pte;
+		pdshift = info->l2_shift;
+		goto out;
+	}
+
 	pmd_pte = pmd_page_vaddr_l4(pmd_pte);
 	page_table = (ulong *)(pmd_pte)
 			+ (BTOP(vaddr) & (info->ptrs_per_l1 - 1));
@@ -456,8 +469,40 @@ ppc64_vtop_level4(unsigned long vaddr)
 	if (!pte)
 		return NOT_PADDR;
 
-	paddr = PAGEBASE(PTOB((pte & info->pte_rpn_mask) >> info->pte_rpn_shift))
+out:
+	if (is_hugepage) {
+		paddr = PAGEBASE(PTOB((pte & info->pte_rpn_mask) >> info->pte_rpn_shift))
+			+ (vaddr & ((1UL << pdshift) - 1));
+	} else {
+		paddr = PAGEBASE(PTOB((pte & info->pte_rpn_mask) >> info->pte_rpn_shift))
 			+ PAGEOFFSET(vaddr);
+	}
+
+	return paddr;
+}
+
+/*
+ *  If the vmemmap address translation information is stored in the kernel,
+ *  make the translation.
+ */
+static unsigned long long
+ppc64_vmemmap_to_phys(unsigned long vaddr)
+{
+	int	i;
+	ulong	offset;
+	unsigned long long paddr = NOT_PADDR;
+
+	if (!info->vmemmap_list)
+		return ppc64_vtop_level4(vaddr);
+
+	for (i = 0; i < info->vmemmap_cnt; i++) {
+		if ((vaddr >= info->vmemmap_list[i].virt) && (vaddr <
+		    (info->vmemmap_list[i].virt + info->vmemmap_psize))) {
+			offset = vaddr - info->vmemmap_list[i].virt;
+			paddr = info->vmemmap_list[i].phys + offset;
+			break;
+		}
+	}
 
 	return paddr;
 }
@@ -567,8 +612,8 @@ get_machdep_info_ppc64(void)
 		return FALSE;
 	}
 
+	info->vmemmap_start = VMEMMAP_REGION_ID << REGION_SHIFT;
 	if (SYMBOL(vmemmap_list) != NOT_FOUND_SYMBOL) {
-		info->vmemmap_start = VMEMMAP_REGION_ID << REGION_SHIFT;
 		info->vmemmap_end = info->vmemmap_start;
 		if (ppc64_vmemmap_init() == FALSE) {
 			ERRMSG("Can't get vmemmap list info.\n");
diff --git a/makedumpfile.h b/makedumpfile.h
index 85e5a4932983..056aee191519 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -678,6 +678,12 @@ unsigned long get_kvbase_arm64(void);
 #define REGION_SHIFT            (60UL)
 #define VMEMMAP_REGION_ID       (0xfUL)
 
+/*
+ * If PAGE_PTE is set, then it's a leaf PTE for hugepage
+ */
+#define PAGE_PTE (1UL << 62)
+#define IS_HUGEPAGE(pte) (!!((pte) & PAGE_PTE))
+
 /* 4-level page table support */
 
 /* 4K pagesize */
-- 
2.41.0




More information about the kexec mailing list