[PATCH 5/8] ppc64: use physical addresses and unfold pud for 64K page size
Hari Bathini
hbathini at linux.vnet.ibm.com
Tue Sep 27 13:24:25 PDT 2016
Starting with kernel v4.6, Linux page tables store physical addresses
in upper page table tree levels for server processors. Also, for 64K
pagesize, Linux page table is switched to 4-level (PUD not folded) to
support both hash and radix page tables in a single kernel. This patch
updates the corresponding changes here.
Signed-off-by: Hari Bathini <hbathini at linux.vnet.ibm.com>
---
arch/ppc64.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
makedumpfile.h | 20 +++++++++++
2 files changed, 115 insertions(+), 6 deletions(-)
diff --git a/arch/ppc64.c b/arch/ppc64.c
index dc8f0f2..69f6348 100644
--- a/arch/ppc64.c
+++ b/arch/ppc64.c
@@ -25,6 +25,68 @@
#include "../makedumpfile.h"
/*
+ * Convert physical address to kernel virtual address
+ */
+static inline ulong paddr_to_vaddr_ppc64(ulong paddr)
+{
+ return (paddr + info->kernel_start);
+}
+
+/*
+ * Convert the raw pgd entry to next pgtable adress
+ */
+static inline ulong pgd_page_vaddr_l4(ulong pgd)
+{
+ ulong pgd_val;
+
+ pgd_val = (pgd & ~info->pgd_masked_bits);
+ if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) {
+ /*
+ * physical address is stored starting from kernel v4.6
+ */
+ pgd_val = paddr_to_vaddr_ppc64(pgd_val);
+ }
+
+ return pgd_val;
+}
+
+/*
+ * Convert the raw pud entry to next pgtable adress
+ */
+static inline ulong pud_page_vaddr_l4(ulong pud)
+{
+ ulong pud_val;
+
+ pud_val = (pud & ~info->pud_masked_bits);
+ if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) {
+ /*
+ * physical address is stored starting from kernel v4.6
+ */
+ pud_val = paddr_to_vaddr_ppc64(pud_val);
+ }
+
+ return pud_val;
+}
+
+/*
+ * Convert the raw pmd entry to next pgtable adress
+ */
+static inline ulong pmd_page_vaddr_l4(ulong pmd)
+{
+ ulong pmd_val;
+
+ pmd_val = (pmd & ~info->pmd_masked_bits);
+ if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) {
+ /*
+ * physical address is stored starting from kernel v4.6
+ */
+ pmd_val = paddr_to_vaddr_ppc64(pmd_val);
+ }
+
+ return pmd_val;
+}
+
+/*
* This function traverses vmemmap list to get the count of vmemmap regions
* and populates the regions' info in info->vmemmap_list[]
*/
@@ -156,7 +218,13 @@ ppc64_vmalloc_init(void)
/*
* 64K pagesize
*/
- if (info->kernel_version >= KERNEL_VERSION(3, 10, 0)) {
+ if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) {
+ info->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10;
+ info->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_6;
+ info->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_6;
+ info->l4_index_size = PGD_INDEX_SIZE_L4_64K_3_10;
+
+ } else if (info->kernel_version >= KERNEL_VERSION(3, 10, 0)) {
info->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10;
info->l2_index_size = PMD_INDEX_SIZE_L4_64K_3_10;
info->l3_index_size = PUD_INDEX_SIZE_L4_64K;
@@ -170,7 +238,17 @@ ppc64_vmalloc_init(void)
info->pte_rpn_shift = (SYMBOL(demote_segment_4k) ?
PTE_RPN_SHIFT_L4_64K_V2 : PTE_RPN_SHIFT_L4_64K_V1);
- info->l2_masked_bits = PMD_MASKED_BITS_64K;
+
+ if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) {
+ info->pgd_masked_bits = PGD_MASKED_BITS_64K_4_6;
+ info->pud_masked_bits = PUD_MASKED_BITS_64K_4_6;
+ info->pmd_masked_bits = PMD_MASKED_BITS_64K_4_6;
+ } else {
+ info->pgd_masked_bits = PGD_MASKED_BITS_64K;
+ info->pud_masked_bits = PUD_MASKED_BITS_64K;
+ info->pmd_masked_bits = (info->kernel_version >= KERNEL_VERSION(3, 11, 0) ?
+ PMD_MASKED_BITS_64K_3_11 : PMD_MASKED_BITS_64K);
+ }
} else {
/*
* 4K pagesize
@@ -183,7 +261,16 @@ ppc64_vmalloc_init(void)
info->pte_rpn_shift = (info->kernel_version >= KERNEL_VERSION(4, 5, 0) ?
PTE_RPN_SHIFT_L4_4K_4_5 : PTE_RPN_SHIFT_L4_4K);
- info->l2_masked_bits = PMD_MASKED_BITS_4K;
+
+ info->pgd_masked_bits = PGD_MASKED_BITS_4K;
+ info->pud_masked_bits = PUD_MASKED_BITS_4K;
+ info->pmd_masked_bits = PMD_MASKED_BITS_4K;
+ }
+
+ info->pte_rpn_mask = PTE_RPN_MASK_DEFAULT;
+ if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) {
+ info->pte_rpn_mask = PTE_RPN_MASK_L4_4_6;
+ info->pte_rpn_shift = PTE_RPN_SHIFT_L4_4_6;
}
/*
@@ -265,6 +352,7 @@ ppc64_vtop_level4(unsigned long vaddr)
* Sometimes we don't have level3 pagetable entries
*/
if (info->l3_index_size != 0) {
+ pgd_pte = pgd_page_vaddr_l4(pgd_pte);
page_upper = (ulong *)((ulong *)pgd_pte + PUD_OFFSET_L4(vaddr));
if (!readmem(VADDR, PAGEBASE(pgd_pte), info->page_buf, PAGESIZE())) {
ERRMSG("Can't read PUD page: 0x%llx\n", PAGEBASE(pgd_pte));
@@ -277,6 +365,7 @@ ppc64_vtop_level4(unsigned long vaddr)
pud_pte = pgd_pte;
}
+ pud_pte = pud_page_vaddr_l4(pud_pte);
page_middle = (ulong *)((ulong *)pud_pte + PMD_OFFSET_L4(vaddr));
if (!readmem(VADDR, PAGEBASE(pud_pte), info->page_buf, PAGESIZE())) {
ERRMSG("Can't read PMD page: 0x%llx\n", PAGEBASE(pud_pte));
@@ -286,7 +375,8 @@ ppc64_vtop_level4(unsigned long vaddr)
if (!(pmd_pte))
return NOT_PADDR;
- page_table = (ulong *)(pmd_pte & ~(info->l2_masked_bits))
+ pmd_pte = pmd_page_vaddr_l4(pmd_pte);
+ page_table = (ulong *)(pmd_pte)
+ (BTOP(vaddr) & (info->ptrs_per_l1 - 1));
if (!readmem(VADDR, PAGEBASE(pmd_pte), info->page_buf, PAGESIZE())) {
ERRMSG("Can't read page table: 0x%llx\n", PAGEBASE(pmd_pte));
@@ -301,7 +391,8 @@ ppc64_vtop_level4(unsigned long vaddr)
if (!pte)
return NOT_PADDR;
- paddr = PAGEBASE(PTOB(pte >> info->pte_rpn_shift)) + PAGEOFFSET(vaddr);
+ paddr = PAGEBASE(PTOB((pte & info->pte_rpn_mask) >> info->pte_rpn_shift))
+ + PAGEOFFSET(vaddr);
return paddr;
}
diff --git a/makedumpfile.h b/makedumpfile.h
index 7f9dfb1..a85fd7f 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -635,6 +635,8 @@ int get_va_bits_arm64(void);
#define PUD_INDEX_SIZE_L4_4K_3_7 9
#define PTE_RPN_SHIFT_L4_4K 17
#define PTE_RPN_SHIFT_L4_4K_4_5 18
+#define PGD_MASKED_BITS_4K 0
+#define PUD_MASKED_BITS_4K 0
#define PMD_MASKED_BITS_4K 0
/* 64K pagesize */
@@ -645,9 +647,22 @@ int get_va_bits_arm64(void);
#define PTE_INDEX_SIZE_L4_64K_3_10 8
#define PMD_INDEX_SIZE_L4_64K_3_10 10
#define PGD_INDEX_SIZE_L4_64K_3_10 12
+#define PMD_INDEX_SIZE_L4_64K_4_6 5
+#define PUD_INDEX_SIZE_L4_64K_4_6 5
#define PTE_RPN_SHIFT_L4_64K_V1 32
#define PTE_RPN_SHIFT_L4_64K_V2 30
+#define PGD_MASKED_BITS_64K 0
+#define PUD_MASKED_BITS_64K 0x1ff
#define PMD_MASKED_BITS_64K 0x1ff
+#define PMD_MASKED_BITS_64K_3_11 0xfff
+#define PGD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL
+#define PUD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL
+#define PMD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL
+
+#define PTE_RPN_MASK_DEFAULT 0xffffffffffffffffUL
+#define PTE_RPN_SIZE_L4_4_6 (info->page_size == 65536 ? 41 : 45)
+#define PTE_RPN_MASK_L4_4_6 (((1UL << PTE_RPN_SIZE_L4_4_6) - 1) << info->page_shift)
+#define PTE_RPN_SHIFT_L4_4_6 info->page_shift
#define PGD_MASK_L4 \
(info->kernel_version >= KERNEL_VERSION(3, 10, 0) ? (info->ptrs_per_pgd - 1) : 0x1ff)
@@ -1124,7 +1139,10 @@ struct DumpInfo {
uint l2_shift;
uint l1_shift;
uint pte_rpn_shift;
- uint l2_masked_bits;
+ ulong pte_rpn_mask;
+ ulong pgd_masked_bits;
+ ulong pud_masked_bits;
+ ulong pmd_masked_bits;
ulong kernel_pgd;
char *page_buf; /* Page buffer to read page tables */
More information about the kexec
mailing list