[PATCH v2 5/7] x86, kstate: Add the ability to preserve memory pages across kexec.
Andrey Ryabinin
arbn at yandex-team.com
Mon Mar 10 05:03:16 PDT 2025
This adds ability to specify page of memory that kstate needs to
preserve across kexec.
kstate_register_page() stores struct page in the special list of
'struct kpage_state's. At kexec reboot stage this list iterated, pfns
saved into kstate's data stream. The new kernel after kexec reads
pfns from the stream and marks memory as reserved to keep it
intact.
Signed-off-by: Andrey Ryabinin <arbn at yandex-team.com>
---
include/linux/kstate.h | 30 ++++++++++
kernel/kexec_core.c | 3 +-
kernel/kstate.c | 124 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 156 insertions(+), 1 deletion(-)
diff --git a/include/linux/kstate.h b/include/linux/kstate.h
index ae583d090111..36cfefd87572 100644
--- a/include/linux/kstate.h
+++ b/include/linux/kstate.h
@@ -88,6 +88,8 @@ struct kstate_field {
};
enum kstate_ids {
+ KSTATE_RSVD_MEM_ID = 1,
+ KSTATE_STRUCT_PAGE_ID,
KSTATE_LAST_ID = -1,
};
@@ -124,6 +126,8 @@ static inline unsigned long kstate_get_ulong(struct kstate_stream *stream)
return ret;
}
+extern struct kstate_description page_state;
+
#ifdef CONFIG_KSTATE
void kstate_init(void);
@@ -141,6 +145,12 @@ void restore_kstate(struct kstate_stream *stream, int id,
const struct kstate_description *kstate, void *obj);
int kstate_load_migrate_buf(struct kimage *image);
+int kstate_page_save(struct kstate_stream *stream, void *obj,
+ const struct kstate_field *field);
+int kstate_register_page(struct page *page, int order);
+
+bool kstate_range_is_preserved(unsigned long start, unsigned long end);
+
#else
static inline void kstate_init(void) { }
@@ -150,6 +160,11 @@ static inline int kstate_save_state(void) { return 0; }
static inline void free_kstate_stream(void) { }
static inline int kstate_load_migrate_buf(struct kimage *image) { return 0; }
+
+static inline bool kstate_range_is_preserved(unsigned long start,
+ unsigned long end)
+{ return 0; }
+
#endif
@@ -176,6 +191,21 @@ static inline int kstate_load_migrate_buf(struct kimage *image) { return 0; }
.offset = offsetof(_state, _f), \
}
+#define KSTATE_PAGE(_f, _state) \
+ { \
+ .name = "page", \
+ .flags = KS_CUSTOM, \
+ .offset = offsetof(_state, _f), \
+ .save = kstate_page_save, \
+ }, \
+ KSTATE_ADDRESS(_f, _state, KS_VMEMMAP_ADDR), \
+ { \
+ .name = "struct_page", \
+ .flags = KS_STRUCT | KS_POINTER, \
+ .offset = offsetof(_state, _f), \
+ .ksd = &page_state, \
+ }
+
#define KSTATE_END_OF_LIST() { \
.flags = KS_END,\
}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 7c79addeb93b..5d001b7a9e44 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/kexec.h>
+#include <linux/kstate.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/highmem.h>
@@ -261,7 +262,7 @@ int kimage_is_destination_range(struct kimage *image,
return 1;
}
- return 0;
+ return kstate_range_is_preserved(start, end);
}
int kimage_is_control_page(struct kimage *image,
diff --git a/kernel/kstate.c b/kernel/kstate.c
index d35996287b76..68a1272abceb 100644
--- a/kernel/kstate.c
+++ b/kernel/kstate.c
@@ -309,6 +309,13 @@ int kstate_register(struct kstate_description *state, void *obj)
return 0;
}
+int kstate_page_save(struct kstate_stream *stream, void *obj,
+ const struct kstate_field *field)
+{
+ kstate_register_page(*(struct page **)obj, 0);
+ return 0;
+}
+
static int __init setup_kstate(char *arg)
{
char *end;
@@ -323,7 +330,124 @@ static int __init setup_kstate(char *arg)
}
early_param("kstate_stream", setup_kstate);
+/*
+ * TODO: probably should use folio instead/in addition,
+ * also will need to think/decide what fields
+ * to preserve or not
+ */
+struct kstate_description page_state = {
+ .name = "struct_page",
+ .id = KSTATE_STRUCT_PAGE_ID,
+ .state_list = LIST_HEAD_INIT(page_state.state_list),
+ .fields = (const struct kstate_field[]) {
+ KSTATE_BASE_TYPE(_mapcount, struct page, atomic_t),
+ KSTATE_BASE_TYPE(_refcount, struct page, atomic_t),
+ KSTATE_END_OF_LIST()
+ },
+};
+
+struct state_entry preserved_se;
+
+struct preserved_pages {
+ unsigned int nr_pages;
+ struct list_head list;
+};
+struct kpage_state {
+ struct list_head list;
+ u8 order;
+ struct page *page;
+};
+
+struct preserved_pages preserved_pages = {
+ .list = LIST_HEAD_INIT(preserved_pages.list)
+};
+
+int kstate_register_page(struct page *page, int order)
+{
+ struct kpage_state *state;
+
+ state = kmalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+
+ state->page = page;
+ state->order = order;
+ list_add(&state->list, &preserved_pages.list);
+ preserved_pages.nr_pages++;
+ return 0;
+}
+
+static int kstate_pages_save(struct kstate_stream *stream, void *obj,
+ const struct kstate_field *field)
+{
+ struct kpage_state *p_state;
+ int ret;
+
+ list_for_each_entry(p_state, &preserved_pages.list, list) {
+ unsigned long paddr = page_to_phys(p_state->page);
+
+ ret = kstate_save_data(stream, &p_state->order,
+ sizeof(p_state->order));
+ if (ret)
+ return ret;
+ ret = kstate_save_data(stream, &paddr, sizeof(paddr));
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+bool kstate_range_is_preserved(unsigned long start, unsigned long end)
+{
+ struct kpage_state *p_state;
+
+ list_for_each_entry(p_state, &preserved_pages.list, list) {
+ unsigned long pstart, pend;
+ pstart = page_to_boot_pfn(p_state->page);
+ pend = pstart + (p_state->order << PAGE_SHIFT) - 1;
+ if ((end >= pstart) && (start <= pend))
+ return 1;
+ }
+ return 0;
+}
+
+static int __init kstate_pages_restore(struct kstate_stream *stream, void *obj,
+ const struct kstate_field *field)
+{
+ struct preserved_pages *preserved_pages = obj;
+ int nr_pages, i;
+
+ nr_pages = preserved_pages->nr_pages;
+ for (i = 0; i < nr_pages; i++) {
+ int order = kstate_get_byte(stream);
+ unsigned long phys = kstate_get_ulong(stream);
+
+ memblock_reserve(phys, PAGE_SIZE << order);
+ }
+ return 0;
+}
+
+struct kstate_description kstate_preserved_mem = {
+ .name = "preserved_range",
+ .id = KSTATE_RSVD_MEM_ID,
+ .state_list = LIST_HEAD_INIT(kstate_preserved_mem.state_list),
+ .fields = (const struct kstate_field[]) {
+ KSTATE_BASE_TYPE(nr_pages, struct preserved_pages, unsigned int),
+ {
+ .name = "pages",
+ .flags = KS_CUSTOM,
+ .size = sizeof(struct preserved_pages),
+ .save = kstate_pages_save,
+ .restore = kstate_pages_restore,
+ },
+
+ KSTATE_END_OF_LIST()
+ },
+};
+
void __init kstate_init(void)
{
memblock_reserve(kstate_stream_addr, kstate_size);
+ __kstate_register(&kstate_preserved_mem, &preserved_pages,
+ &preserved_se);
}
--
2.45.3
More information about the kexec
mailing list