[RFC PATCH v2 14/21] riscv: mm: Only apply svnapot region bigger than software page

Xu Lu luxu.kernel at bytedance.com
Thu Dec 5 02:37:22 PST 2024


Usually, when it comes to napot pte order, we refer to the order of
software page number. Thus, this commit updates the napot order
calculation method. Also, we only apply svnapot pte as huge pte when its
napot size is bigger than software page.

Signed-off-by: Xu Lu <luxu.kernel at bytedance.com>
---
 arch/riscv/include/asm/pgtable-64.h | 21 +++++++++---
 arch/riscv/include/asm/pgtable.h    | 50 +++++++++++++++++++++++------
 arch/riscv/mm/hugetlbpage.c         |  7 ++--
 3 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 26c13484e721..fbdaad9a98dd 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -124,12 +124,23 @@ enum napot_cont_order {
 	NAPOT_ORDER_MAX,
 };
 
+#define NAPOT_PAGE_ORDER_BASE							\
+	((NAPOT_CONT_ORDER_BASE >= (PAGE_SHIFT - HW_PAGE_SHIFT)) ?		\
+	 (NAPOT_CONT_ORDER_BASE - (PAGE_SHIFT - HW_PAGE_SHIFT)) : 1)
+#define NAPOT_PAGE_ORDER_MAX							\
+	((NAPOT_ORDER_MAX > (PAGE_SHIFT - HW_PAGE_SHIFT)) ?			\
+	 (NAPOT_ORDER_MAX - (PAGE_SHIFT - HW_PAGE_SHIFT)) :			\
+	 NAPOT_PAGE_ORDER_BASE)
+
 #define for_each_napot_order(order)						\
-	for (order = NAPOT_CONT_ORDER_BASE; order < NAPOT_ORDER_MAX; order++)
+	for (order = NAPOT_PAGE_ORDER_BASE;					\
+		order < NAPOT_PAGE_ORDER_MAX; order++)
 #define for_each_napot_order_rev(order)						\
-	for (order = NAPOT_ORDER_MAX - 1;					\
-	     order >= NAPOT_CONT_ORDER_BASE; order--)
-#define napot_cont_order(val)	(__builtin_ctzl((pte_val(val) >> _PAGE_PFN_SHIFT) << 1))
+	for (order = NAPOT_PAGE_ORDER_MAX - 1;					\
+		order >= NAPOT_PAGE_ORDER_BASE; order--)
+#define napot_cont_order(val)							\
+	(__builtin_ctzl((pte_val(val) >> _PAGE_HWPFN_SHIFT) << 1) -		\
+		(PAGE_SHIFT - HW_PAGE_SHIFT))
 
 #define napot_cont_shift(order)	((order) + PAGE_SHIFT)
 #define napot_cont_size(order)	BIT(napot_cont_shift(order))
@@ -137,7 +148,7 @@ enum napot_cont_order {
 #define napot_pte_num(order)	BIT(order)
 
 #ifdef CONFIG_RISCV_ISA_SVNAPOT
-#define HUGE_MAX_HSTATE		(2 + (NAPOT_ORDER_MAX - NAPOT_CONT_ORDER_BASE))
+#define HUGE_MAX_HSTATE		(2 + (NAPOT_ORDER_MAX - NAPOT_PAGE_ORDER_BASE))
 #else
 #define HUGE_MAX_HSTATE		2
 #endif
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 0fd9bd4e0d13..07d557bc8b39 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -130,7 +130,7 @@
 #include <asm/compat.h>
 
 #define __page_val_to_hwpfn(_val)  (((_val) & _PAGE_HW_PFN_MASK) >> _PAGE_HWPFN_SHIFT)
-#define __page_val_to_pfn(_val)  (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT)
+static inline unsigned long __page_val_to_pfn(unsigned long val);
 
 #ifdef CONFIG_64BIT
 #include <asm/pgtable-64.h>
@@ -470,15 +470,42 @@ static inline unsigned long pte_napot(pte_t pte)
 	return __pte_napot(pte_val(pte));
 }
 
-static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
+static inline unsigned long __pte_mknapot(unsigned long pteval,
+					  unsigned int order)
 {
 	int pos = order - 1 + _PAGE_PFN_SHIFT;
 	unsigned long napot_bit = BIT(pos);
-	unsigned long napot_mask = ~GENMASK(pos, _PAGE_PFN_SHIFT);
+	unsigned long napot_mask = ~GENMASK(pos, _PAGE_HWPFN_SHIFT);
+
+	BUG_ON(__pte_napot(pteval));
+	pteval = (pteval & napot_mask) | napot_bit | _PAGE_NAPOT;
 
-	return __pte((pte_val(pte) & napot_mask) | napot_bit | _PAGE_NAPOT);
+	return pteval;
 }
 
+#ifdef CONFIG_RISCV_USE_SW_PAGE
+static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
+{
+	unsigned long pteval = pte_val(pte);
+	unsigned int i;
+
+	pteval = __pte_mknapot(pteval, order);
+	for (i = 0; i < HW_PAGES_PER_PAGE; i++)
+		pte.ptes[i] = pteval;
+
+	return pte;
+}
+#else
+static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
+{
+	unsigned long pteval = pte_val(pte);
+
+	pte_val(pte) = __pte_mknapot(pteval, order);
+
+	return pte;
+}
+#endif /* CONFIG_RISCV_USE_SW_PAGE */
+
 #else
 
 static __always_inline bool has_svnapot(void) { return false; }
@@ -495,15 +522,20 @@ static inline unsigned long pte_napot(pte_t pte)
 
 #endif /* CONFIG_RISCV_ISA_SVNAPOT */
 
-/* Yields the page frame number (PFN) of a page table entry */
-static inline unsigned long pte_pfn(pte_t pte)
+static inline unsigned long __page_val_to_pfn(unsigned long pteval)
 {
-	unsigned long res  = __page_val_to_pfn(pte_val(pte));
+	unsigned long res = __page_val_to_hwpfn(pteval);
 
-	if (has_svnapot() && pte_napot(pte))
+	if (has_svnapot() && __pte_napot(pteval))
 		res = res & (res - 1UL);
 
-	return res;
+	return hwpfn_to_pfn(res);
+}
+
+/* Yields the page frame number (PFN) of a page table entry */
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return __page_val_to_pfn(pte_val(pte));
 }
 
 #define pte_page(x)     pfn_to_page(pte_pfn(x))
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index 8896c28ec881..4286c7dea68d 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -212,7 +212,7 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
 			break;
 		}
 	}
-	if (order == NAPOT_ORDER_MAX)
+	if (order == NAPOT_PAGE_ORDER_MAX)
 		entry = pte_mkhuge(entry);
 
 	return entry;
@@ -405,7 +405,8 @@ static __init int napot_hugetlbpages_init(void)
 		unsigned long order;
 
 		for_each_napot_order(order)
-			hugetlb_add_hstate(order);
+			if (napot_cont_shift(order) > PAGE_SHIFT)
+				hugetlb_add_hstate(order);
 	}
 	return 0;
 }
@@ -426,7 +427,7 @@ static bool __hugetlb_valid_size(unsigned long size)
 		return true;
 	else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
 		return true;
-	else if (is_napot_size(size))
+	else if (is_napot_size(size) && size > PAGE_SIZE)
 		return true;
 	else
 		return false;
-- 
2.20.1




More information about the linux-riscv mailing list