[RFC v3 11/21] mm: PKRAM: reserve preserved memory at boot

Anthony Yznaga anthony.yznaga at oracle.com
Wed Apr 26 17:08:47 PDT 2023


Keep preserved pages from being recycled during boot by adding them
to the memblock reserved list during early boot. If memory reservation
fails (e.g. a region has already been reserved), all preserved pages
are dropped.

Signed-off-by: Anthony Yznaga <anthony.yznaga at oracle.com>
---
 arch/x86/kernel/setup.c |  3 ++
 arch/x86/mm/init_64.c   |  2 ++
 include/linux/pkram.h   |  8 +++++
 mm/pkram.c              | 84 ++++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 16babff771bd..2806b21236d0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -18,6 +18,7 @@
 #include <linux/memblock.h>
 #include <linux/panic_notifier.h>
 #include <linux/pci.h>
+#include <linux/pkram.h>
 #include <linux/root_dev.h>
 #include <linux/hugetlb.h>
 #include <linux/tboot.h>
@@ -1221,6 +1222,8 @@ void __init setup_arch(char **cmdline_p)
 	initmem_init();
 	dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
 
+	pkram_reserve();
+
 	if (boot_cpu_has(X86_FEATURE_GBPAGES))
 		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a190aae8ceaf..a46ffb434f39 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -34,6 +34,7 @@
 #include <linux/gfp.h>
 #include <linux/kcore.h>
 #include <linux/bootmem_info.h>
+#include <linux/pkram.h>
 
 #include <asm/processor.h>
 #include <asm/bios_ebda.h>
@@ -1339,6 +1340,7 @@ void __init mem_init(void)
 	after_bootmem = 1;
 	x86_init.hyper.init_after_bootmem();
 
+	totalram_pages_add(pkram_reserved_pages);
 	/*
 	 * Must be done after boot memory is put on freelist, because here we
 	 * might set fields in deferred struct pages that have not yet been
diff --git a/include/linux/pkram.h b/include/linux/pkram.h
index b614e9059bba..53d5a1ec42ff 100644
--- a/include/linux/pkram.h
+++ b/include/linux/pkram.h
@@ -99,4 +99,12 @@ int pkram_prepare_save(struct pkram_stream *ps, const char *name,
 ssize_t pkram_write(struct pkram_access *pa, const void *buf, size_t count);
 size_t pkram_read(struct pkram_access *pa, void *buf, size_t count);
 
+#ifdef CONFIG_PKRAM
+extern unsigned long pkram_reserved_pages;
+void pkram_reserve(void);
+#else
+#define pkram_reserved_pages 0UL
+static inline void pkram_reserve(void) { }
+#endif
+
 #endif /* _LINUX_PKRAM_H */
diff --git a/mm/pkram.c b/mm/pkram.c
index c649504fa1fa..b711f94dbef4 100644
--- a/mm/pkram.c
+++ b/mm/pkram.c
@@ -134,6 +134,8 @@ extern void pkram_find_preserved(unsigned long start, unsigned long end, void *p
 static LIST_HEAD(pkram_nodes);			/* linked through page::lru */
 static DEFINE_MUTEX(pkram_mutex);		/* serializes open/close */
 
+unsigned long __initdata pkram_reserved_pages;
+
 /*
  * The PKRAM super block pfn, see above.
  */
@@ -143,6 +145,59 @@ static int __init parse_pkram_sb_pfn(char *arg)
 }
 early_param("pkram", parse_pkram_sb_pfn);
 
+static void * __init pkram_map_meta(unsigned long pfn)
+{
+	if (pfn >= max_low_pfn)
+		return ERR_PTR(-EINVAL);
+	return pfn_to_kaddr(pfn);
+}
+
+int pkram_merge_with_reserved(void);
+/*
+ * Reserve pages that belong to preserved memory.
+ *
+ * This function should be called at boot time as early as possible to prevent
+ * preserved memory from being recycled.
+ */
+void __init pkram_reserve(void)
+{
+	int err = 0;
+
+	if (!pkram_sb_pfn)
+		return;
+
+	pr_info("PKRAM: Examining preserved memory...\n");
+
+	/* Verify that nothing else has reserved the pkram_sb page */
+	if (memblock_is_region_reserved(PFN_PHYS(pkram_sb_pfn), PAGE_SIZE)) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	pkram_sb = pkram_map_meta(pkram_sb_pfn);
+	if (IS_ERR(pkram_sb)) {
+		err = PTR_ERR(pkram_sb);
+		goto out;
+	}
+	/* An empty pkram_sb is not an error */
+	if (!pkram_sb->node_pfn) {
+		pkram_sb = NULL;
+		goto done;
+	}
+
+	err = pkram_merge_with_reserved();
+out:
+	if (err) {
+		pr_err("PKRAM: Reservation failed: %d\n", err);
+		WARN_ON(pkram_reserved_pages > 0);
+		pkram_sb = NULL;
+		return;
+	}
+
+done:
+	pr_info("PKRAM: %lu pages reserved\n", pkram_reserved_pages);
+}
+
 static inline struct page *pkram_alloc_page(gfp_t gfp_mask)
 {
 	struct page *page;
@@ -162,6 +217,7 @@ static inline struct page *pkram_alloc_page(gfp_t gfp_mask)
 
 static inline void pkram_free_page(void *addr)
 {
+	__ClearPageReserved(virt_to_page(addr));
 	pkram_remove_identity_map(virt_to_page(addr));
 	free_page((unsigned long)addr);
 }
@@ -193,13 +249,23 @@ static void pkram_truncate_link(struct pkram_link *link)
 {
 	struct page *page;
 	pkram_entry_t p;
-	int i;
+	int i, j, order;
 
 	for (i = 0; i < PKRAM_LINK_ENTRIES_MAX; i++) {
 		p = link->entry[i];
 		if (!p)
 			continue;
+		order = p & PKRAM_ENTRY_ORDER_MASK;
+		if (order >= MAX_ORDER) {
+			pr_err("PKRAM: attempted truncate of invalid page\n");
+			return;
+		}
 		page = pfn_to_page(PHYS_PFN(p));
+		for (j = 0; j < (1 << order); j++) {
+			struct page *pg = page + j;
+
+			__ClearPageReserved(pg);
+		}
 		pkram_remove_identity_map(page);
 		put_page(page);
 	}
@@ -680,7 +746,7 @@ static int __pkram_bytes_save_page(struct pkram_access *pa, struct page *page)
 static struct page *__pkram_prep_load_page(pkram_entry_t p)
 {
 	struct page *page;
-	int order;
+	int i, order;
 	short flags;
 
 	flags = (p >> PKRAM_ENTRY_FLAGS_SHIFT) & PKRAM_ENTRY_FLAGS_MASK;
@@ -690,9 +756,16 @@ static struct page *__pkram_prep_load_page(pkram_entry_t p)
 
 	page = pfn_to_page(PHYS_PFN(p));
 
-	if (!page_ref_freeze(pg, 1)) {
-		pr_err("PKRAM preserved page has unexpected inflated ref count\n");
-		goto out_error;
+	for (i = 0; i < (1 << order); i++) {
+		struct page *pg = page + i;
+		int was_rsvd;
+
+		was_rsvd = PageReserved(pg);
+		__ClearPageReserved(pg);
+		if ((was_rsvd || i == 0) && !page_ref_freeze(pg, 1)) {
+			pr_err("PKRAM preserved page has unexpected inflated ref count\n");
+			goto out_error;
+		}
 	}
 
 	if (order) {
@@ -1331,6 +1404,7 @@ int __init pkram_create_merged_reserved(struct memblock_type *new)
 	}
 
 	WARN_ON(cnt_a + cnt_b != k);
+	pkram_reserved_pages = nr_preserved;
 	new->cnt = cnt_a + cnt_b;
 	new->total_size = total_size;
 
-- 
1.9.4




More information about the kexec mailing list