[PATCH v2 11/11] arm64: kernel: Add support for hibernate/suspend-to-disk.
James Morse
james.morse at arm.com
Tue Oct 27 10:29:20 PDT 2015
Add support for hibernate/suspend-to-disk.
Suspend borrows code from cpu_suspend() to write cpu state onto the stack,
before calling swsusp_save() to save the memory image.
Restore creates a set of temporary page tables, covering the kernel and the
linear map, copies the restore code to a 'safe' page, then uses the copy to
restore the memory image. It calls into cpu_resume(),
and then follows the normal cpu_suspend() path back into the suspend code.
The implementation assumes that exactly the same kernel is booted on the
same hardware, and that the kernel is loaded at the same physical address.
Signed-off-by: James Morse <james.morse at arm.com>
---
arch/arm64/Kconfig | 3 +
arch/arm64/include/asm/suspend.h | 8 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 4 +
arch/arm64/kernel/hibernate-asm.S | 118 +++++++++++++
arch/arm64/kernel/hibernate.c | 359 ++++++++++++++++++++++++++++++++++++++
arch/arm64/kernel/vmlinux.lds.S | 15 ++
7 files changed, 508 insertions(+)
create mode 100644 arch/arm64/kernel/hibernate-asm.S
create mode 100644 arch/arm64/kernel/hibernate.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 07d1811aa03f..d081dbc35335 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -707,6 +707,9 @@ menu "Power management options"
source "kernel/power/Kconfig"
+config ARCH_HIBERNATION_POSSIBLE
+ def_bool y
+
config ARCH_SUSPEND_POSSIBLE
def_bool y
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 5faa3ce1fa3a..e75ad7aa268c 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -1,3 +1,5 @@
+#include <linux/suspend.h>
+
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H
@@ -34,6 +36,12 @@ struct sleep_stack_data {
unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS];
};
+extern int swsusp_arch_suspend(void);
+extern int swsusp_arch_resume(void);
+int swsusp_arch_suspend_enter(struct cpu_suspend_ctx *ptr);
+void __noreturn swsusp_arch_suspend_exit(phys_addr_t tmp_pg_dir,
+ phys_addr_t swapper_pg_dir,
+ void *kernel_start, void *kernel_end);
extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
extern void cpu_resume(void);
int __cpu_suspend_enter(struct sleep_stack_data *state);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 22dc9bc781be..b9151ae4a7ae 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -36,6 +36,7 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
+arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3cb1383d3deb..b5d9495a94a1 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
+#include <linux/suspend.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
@@ -160,5 +161,8 @@ int main(void)
DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs));
DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs));
#endif
+ DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
+ DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
+ DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
return 0;
}
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
new file mode 100644
index 000000000000..2cead4779804
--- /dev/null
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -0,0 +1,118 @@
+#include <linux/linkage.h>
+#include <linux/errno.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/cputype.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+/*
+ * Corrupt memory.
+ *
+ * Loads temporary page tables then restores the memory image.
+ * Finally branches to cpu_resume() to restore the state saved by
+ * swsusp_arch_suspend().
+ *
+ * Because this code has to be copied to a safe_page, it can't call out to
+ * other functions by pc-relative address. Also remember that it may be
+ * mid-way through over-writing other functions. For this reason it contains
+ * a copy of copy_page() and code from flush_icache_range().
+ *
+ * All of memory gets written to, including code. We need to clean the kernel
+ * text to the PoC before secondary cores can be booted. Because kernel modules,
+ * and executable pages mapped to user space are also written as data, we
+ * clean all pages we touch to the PoU.
+ *
+ * x0: physical address of temporary page tables
+ * x1: physical address of swapper page tables
+ * x2: address of kernel_start
+ * x3: address of kernel_end
+ */
+.pushsection ".hibernate_exit.text", "ax"
+ENTRY(swsusp_arch_suspend_exit)
+ /* Temporary page tables are a copy, so no need for a trampoline here */
+ msr ttbr1_el1, x0
+ isb
+ tlbi vmalle1is
+ ic ialluis
+ isb
+
+ mov x21, x1
+ mov x22, x2
+ mov x23, x3
+
+ /* walk the restore_pblist and use copy_page() to over-write memory */
+ ldr x19, =restore_pblist
+ ldr x19, [x19]
+
+2: ldr x10, [x19, #HIBERN_PBE_ORIG]
+ mov x0, x10
+ ldr x1, [x19, #HIBERN_PBE_ADDR]
+
+ /* arch/arm64/lib/copy_page.S:copy_page() */
+ prfm pldl1strm, [x1, #64]
+3: ldp x2, x3, [x1]
+ ldp x4, x5, [x1, #16]
+ ldp x6, x7, [x1, #32]
+ ldp x8, x9, [x1, #48]
+ add x1, x1, #64
+ prfm pldl1strm, [x1, #64]
+ stnp x2, x3, [x0]
+ stnp x4, x5, [x0, #16]
+ stnp x6, x7, [x0, #32]
+ stnp x8, x9, [x0, #48]
+ add x0, x0, #64
+ tst x1, #(PAGE_SIZE - 1)
+ b.ne 3b
+
+ dsb ish // memory restore must finish before cleaning
+
+ add x1, x10, #PAGE_SIZE
+ /* Clean the copied page to PoU - based on flush_icache_range() */
+ dcache_line_size x2, x3
+ sub x3, x2, #1
+ bic x4, x10, x3
+4: dc cvau, x4 // clean D line / unified line
+ add x4, x4, x2
+ cmp x4, x1
+ b.lo 4b
+
+ ldr x19, [x19, #HIBERN_PBE_NEXT]
+ cbnz x19, 2b
+
+ /* Clean the kernel text to PoC - based on flush_icache_range() */
+ dcache_line_size x2, x3
+ sub x3, x2, #1
+ bic x4, x22, x3
+5: dc cvac, x4
+ add x4, x4, x2
+ cmp x4, x23
+ b.lo 5b
+
+ /*
+ * branch into the restored kernel - so that when we restore the page
+ * tables, code continues to be executable.
+ */
+ ldr x1, =__hibernate_exit
+ mov x0, x21 // physical address of swapper page tables.
+ br x1
+
+ .ltorg
+ENDPROC(swsusp_arch_suspend_exit)
+.popsection
+
+/*
+ * Reset the page tables, and wake up in cpu_resume().
+ * Temporary page tables were a copy, so again, no trampoline here.
+ *
+ * x0: physical address of swapper_pg_dir
+ */
+ENTRY(__hibernate_exit)
+ msr ttbr1_el1, x0
+ isb
+ tlbi vmalle1is
+ ic ialluis
+ isb
+ b _cpu_resume
+ENDPROC(__hibernate_exit)
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
new file mode 100644
index 000000000000..40eb55bcee15
--- /dev/null
+++ b/arch/arm64/kernel/hibernate.c
@@ -0,0 +1,359 @@
+/*:
+ * Hibernate support specific for ARM64
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ * https://lkml.org/lkml/2010/6/18/4
+ * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ * https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw at sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#define pr_fmt(x) "hibernate: " x
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/pm.h>
+#include <linux/sched.h>
+#include <linux/suspend.h>
+#include <linux/version.h>
+
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/irqflags.h>
+#include <asm/memory.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/sections.h>
+#include <asm/suspend.h>
+
+/* These are necessary to build without ifdefery */
+#ifndef pmd_index
+#define pmd_index(x) 0
+#endif
+#ifndef pud_index
+#define pud_index(x) 0
+#endif
+
+/*
+ * Start/end of the hibernate exit code, this must be copied to a 'safe'
+ * location in memory, and executed from there.
+ */
+extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+ unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+ return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+ WARN_ON(num_online_cpus() != 1);
+ local_fiq_disable();
+}
+
+void notrace restore_processor_state(void)
+{
+ local_fiq_enable();
+}
+
+/*
+ * Copies src_length bytes, starting at src_start into an new page,
+ * perform cache maintentance, then map it at the top of memory as executable.
+ *
+ * This is used by hibernate to copy the code it needs to execute when
+ * overwriting the kernel text.
+ *
+ * Suggested allocators are get_safe_page() or get_zeroed_page(). Your chosen
+ * mask must cause zero'd pages to be returned.
+ */
+static int create_safe_exec_page(void *src_start, size_t length,
+ void **dst_addr,
+ unsigned long (*allocator)(gfp_t mask),
+ gfp_t mask)
+{
+ int rc = 0;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long dst = allocator(mask);
+ if (!dst) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ memcpy((void *)dst, src_start, length);
+ flush_icache_range(dst, dst + length);
+
+ pgd = pgd_offset(&init_mm, (unsigned long)-1);
+ if (!pgd_val(*pgd) && PTRS_PER_PGD > 1) {
+ pud = (pud_t *)allocator(mask);
+ if (!pud) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ set_pgd(pgd, __pgd(virt_to_phys(pud) | PUD_TYPE_TABLE));
+ }
+
+ pud = pud_offset(pgd, (unsigned long)-1);
+ if (!pud_val(*pud) && PTRS_PER_PUD > 1) {
+ pmd = (pmd_t *)allocator(mask);
+ if (!pmd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ set_pud(pud, __pud(virt_to_phys(pmd) | PUD_TYPE_TABLE));
+ }
+
+ pmd = pmd_offset(pud, (unsigned long)-1);
+ if (!pmd_val(*pmd) && PTRS_PER_PMD > 1) {
+ pte = (pte_t *)allocator(mask);
+ if (!pte) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ set_pmd(pmd, __pmd(virt_to_phys(pte) | PMD_TYPE_TABLE));
+ }
+
+ pte = pte_offset_kernel(pmd, (unsigned long)-1);
+ set_pte_at(&init_mm, dst, pte,
+ __pte(virt_to_phys((void *)dst) | PAGE_KERNEL_EXEC));
+
+ /* this is a new mapping, so no need for a tlbi */
+
+ *dst_addr = (void *)((unsigned long)-1 & PAGE_MASK);
+
+out:
+ return rc;
+}
+
+
+int swsusp_arch_suspend(void)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct sleep_stack_data state;
+ struct mm_struct *mm = current->active_mm;
+
+ local_dbg_save(flags);
+
+ if (__cpu_suspend_enter(&state))
+ ret = swsusp_save();
+ else
+ __cpu_suspend_exit(mm);
+
+ local_dbg_restore(flags);
+
+ return ret;
+}
+
+static int copy_pte(pmd_t *dst, pmd_t *src, unsigned long *start_addr)
+{
+ int i;
+ pte_t *old_pte = pte_offset_kernel(src, *start_addr);
+ pte_t *new_pte = pte_offset_kernel(dst, *start_addr);
+
+ for (i = pte_index(*start_addr); i < PTRS_PER_PTE;
+ i++, old_pte++, new_pte++) {
+ if (pte_val(*old_pte))
+ set_pte(new_pte,
+ __pte(pte_val(*old_pte) & ~PTE_RDONLY));
+ }
+
+ *start_addr &= PAGE_MASK;
+
+ return 0;
+}
+
+static int copy_pmd(pud_t *dst, pud_t *src, unsigned long *start_addr)
+{
+ int i;
+ int rc = 0;
+ pte_t *new_pte;
+ pmd_t *old_pmd = pmd_offset(src, *start_addr);
+ pmd_t *new_pmd = pmd_offset(dst, *start_addr);
+
+ for (i = pmd_index(*start_addr); i < PTRS_PER_PMD;
+ i++, *start_addr += PMD_SIZE, old_pmd++, new_pmd++) {
+ if (!pmd_val(*old_pmd))
+ continue;
+
+ if (pmd_table(*(old_pmd))) {
+ new_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
+ if (!new_pte) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ set_pmd(new_pmd, __pmd(virt_to_phys(new_pte)
+ | PMD_TYPE_TABLE));
+
+ rc = copy_pte(new_pmd, old_pmd, start_addr);
+ if (rc)
+ break;
+ } else
+ set_pmd(new_pmd,
+ __pmd(pmd_val(*old_pmd) & ~PMD_SECT_RDONLY));
+
+ *start_addr &= PMD_MASK;
+ }
+
+ return rc;
+}
+
+static int copy_pud(pgd_t *dst, pgd_t *src, unsigned long *start_addr)
+{
+ int i;
+ int rc = 0;
+ pmd_t *new_pmd;
+ pud_t *old_pud = pud_offset(src, *start_addr);
+ pud_t *new_pud = pud_offset(dst, *start_addr);
+
+ for (i = pud_index(*start_addr); i < PTRS_PER_PUD;
+ i++, *start_addr += PUD_SIZE, old_pud++, new_pud++) {
+ if (!pud_val(*old_pud))
+ continue;
+
+ if (pud_table(*(old_pud))) {
+ if (PTRS_PER_PMD != 1) {
+ new_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!new_pmd) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ set_pud(new_pud, __pud(virt_to_phys(new_pmd)
+ | PUD_TYPE_TABLE));
+ }
+
+ rc = copy_pmd(new_pud, old_pud, start_addr);
+ if (rc)
+ break;
+ } else
+ set_pud(new_pud,
+ __pud(pud_val(*old_pud) & ~PMD_SECT_RDONLY));
+
+ *start_addr &= PUD_MASK;
+ }
+
+ return rc;
+}
+
+static int copy_page_tables(pgd_t *new_pgd, unsigned long start_addr)
+{
+ int i;
+ int rc = 0;
+ pud_t *new_pud;
+ pgd_t *old_pgd = pgd_offset_k(start_addr);
+
+ new_pgd += pgd_index(start_addr);
+
+ for (i = pgd_index(start_addr); i < PTRS_PER_PGD;
+ i++, start_addr += PGDIR_SIZE, old_pgd++, new_pgd++) {
+ if (!pgd_val(*old_pgd))
+ continue;
+
+ if (PTRS_PER_PUD != 1) {
+ new_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!new_pud) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ set_pgd(new_pgd, __pgd(virt_to_phys(new_pud)
+ | PUD_TYPE_TABLE));
+ }
+
+ rc = copy_pud(new_pgd, old_pgd, &start_addr);
+ if (rc)
+ break;
+
+ start_addr &= PGDIR_MASK;
+ }
+
+ return rc;
+}
+
+/*
+ * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
+ *
+ * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
+ * we don't need to free it here.
+ *
+ * Allocate a safe zero page to use as ttbr0, as all existing page tables, and
+ * even the empty_zero_page will be overwritten.
+ */
+int swsusp_arch_resume(void)
+{
+ int rc = 0;
+ size_t exit_size;
+ pgd_t *tmp_pg_dir;
+ void *safe_zero_page_mem;
+ unsigned long tmp_pg_start;
+ void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, void *);
+
+ /* Copy swsusp_arch_suspend_exit() to a safe page. */
+ exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
+ rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
+ (void **)&hibernate_exit, get_safe_page, GFP_ATOMIC);
+ if (rc) {
+ pr_err("Failed to create safe executable page for"
+ " hibernate_exit code.");
+ goto out;
+ }
+
+ /*
+ * Even the zero page may get overwritten during restore.
+ * get_safe_page() only returns zero'd pages.
+ */
+ safe_zero_page_mem = (void *)get_safe_page(GFP_ATOMIC);
+ if (!safe_zero_page_mem) {
+ pr_err("Failed to allocate memory for zero page.");
+ rc = -ENOMEM;
+ goto out;
+ }
+ empty_zero_page = virt_to_page(safe_zero_page_mem);
+ cpu_set_reserved_ttbr0();
+
+ /*
+ * Restoring the memory image will overwrite the ttbr1 page tables.
+ * Create a second copy, of the kernel and linear map, and use this
+ * when restoring.
+ */
+ tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!tmp_pg_dir) {
+ pr_err("Failed to allocate memory for temporary page tables.");
+ rc = -ENOMEM;
+ goto out;
+ }
+ tmp_pg_start = min((unsigned long)KERNEL_START,
+ (unsigned long)PAGE_OFFSET);
+ rc = copy_page_tables(tmp_pg_dir, tmp_pg_start);
+ if (rc)
+ goto out;
+
+ /*
+ * EL2 may get upset if we overwrite its page-tables/stack.
+ * kvm_arch_hardware_disable() returns EL2 to the hyp stub. This
+ * isn't needed on normal suspend/resume as PSCI prevents us from
+ * ruining EL2.
+ */
+ if (IS_ENABLED(CONFIG_KVM_ARM_HOST))
+ kvm_arch_hardware_disable();
+
+ hibernate_exit(virt_to_phys(tmp_pg_dir), virt_to_phys(swapper_pg_dir),
+ KERNEL_START, KERNEL_END);
+
+out:
+ return rc;
+}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 98073332e2d0..3d8284d91f4c 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -44,6 +44,16 @@ jiffies = jiffies_64;
*(.idmap.text) \
VMLINUX_SYMBOL(__idmap_text_end) = .;
+#ifdef CONFIG_HIBERNATION
+#define HIBERNATE_TEXT \
+ . = ALIGN(SZ_4K); \
+ VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\
+ *(.hibernate_exit.text) \
+ VMLINUX_SYMBOL(__hibernate_exit_text_end) = .;
+#else
+#define HIBERNATE_TEXT
+#endif
+
/*
* The size of the PE/COFF section that covers the kernel image, which
* runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -102,6 +112,7 @@ SECTIONS
LOCK_TEXT
HYPERVISOR_TEXT
IDMAP_TEXT
+ HIBERNATE_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
@@ -181,6 +192,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"HYP init code too big or misaligned")
ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"ID map text too big or misaligned")
+#ifdef CONFIG_HIBERNATION
+ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
+ <= SZ_4K, "Hibernate exit text too big or misaligned")
+#endif
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
--
2.1.4
More information about the linux-arm-kernel
mailing list