[RFC PATCH 2/2] x86/kexec: Add data section to relocate_kernel
David Woodhouse
dwmw2 at infradead.org
Thu Nov 7 21:22:41 PST 2024
From: David Woodhouse <dwmw at amazon.co.uk>
Now that it's handled sanely by a linker script we can have actual data,
and just use %rip-relative addressing to access it.
If we could call the *copy* instead of the original relocate_kernel in
the kernel text, then we could use %rip-relative addressing everywhere.
Signed-off-by: David Woodhouse <dwmw at amazon.co.uk>
---
arch/x86/kernel/relocate_kernel_64.S | 58 ++++++++++++++++------------
arch/x86/kernel/vmlinux.lds.S | 2 +-
2 files changed, 35 insertions(+), 25 deletions(-)
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 1efcbd340528..577aa1672349 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -27,18 +27,28 @@
* ~ control_page + PAGE_SIZE are used as data storage and stack for
* jumping back
*/
-#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
+ .section .data.relocate_kernel,"a";
/* Minimal CPU state */
-#define RSP DATA(0x0)
-#define CR0 DATA(0x8)
-#define CR3 DATA(0x10)
-#define CR4 DATA(0x18)
-
+SYM_DATA_LOCAL(saved_rsp, .quad 0)
+SYM_DATA_LOCAL(saved_cr0, .quad 0)
+SYM_DATA_LOCAL(saved_cr3, .quad 0)
+SYM_DATA_LOCAL(saved_cr4, .quad 0)
/* other data */
-#define CP_PA_TABLE_PAGE DATA(0x20)
-#define CP_PA_SWAP_PAGE DATA(0x28)
-#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
+SYM_DATA_LOCAL(pa_table_page, .quad 0)
+SYM_DATA_LOCAL(pa_swap_page, .quad 0)
+SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
+
+/*
+ * There are two physical copies of relocate_kernel(), one in the original
+ * Kernel text and the other copied to the control page. There is a virtual
+ * mapping of each, in the original kernel. It is the *original* which is
+ * called from machine_kexec(), largely becaose the copy isn't mapped as an
+ * executable page. Thus, this code cannot just use %rip-relative addressing
+ * until after the %cr3 change and the jump to identity_mapped(). Until
+ * then, some pointer arithmetic is required.
+ */
+#define DATA(x) (x - relocate_kernel)
.section .text.relocate_kernel,"ax";
.code64
@@ -63,13 +73,13 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
pushf
movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
- movq %rsp, RSP(%r11)
+ movq %rsp, DATA(saved_rsp)(%r11)
movq %cr0, %rax
- movq %rax, CR0(%r11)
+ movq %rax, DATA(saved_cr0)(%r11)
movq %cr3, %rax
- movq %rax, CR3(%r11)
+ movq %rax, DATA(saved_cr3)(%r11)
movq %cr4, %rax
- movq %rax, CR4(%r11)
+ movq %rax, DATA(saved_cr4)(%r11)
/* Save CR4. Required to enable the right paging mode later. */
movq %rax, %r13
@@ -94,9 +104,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
movq PTR(PA_SWAP_PAGE)(%rsi), %r10
/* save some information for jumping back */
- movq %r9, CP_PA_TABLE_PAGE(%r11)
- movq %r10, CP_PA_SWAP_PAGE(%r11)
- movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
+ movq %r9, DATA(pa_table_page)(%r11)
+ movq %r10, DATA(pa_swap_page)(%r11)
+ movq %rdi, DATA(pa_backup_pages_map)(%r11)
/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
movq %rcx, %r11
@@ -128,7 +138,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/* set return address to 0 if not preserving context */
pushq $0
/* store the start address on the stack */
- pushq %rdx
+ pushq start_address(%rip)
/*
* Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
@@ -227,9 +237,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/* get the re-entry point of the peer system */
movq 0(%rsp), %rbp
leaq relocate_kernel(%rip), %r8
- movq CP_PA_SWAP_PAGE(%r8), %r10
- movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
- movq CP_PA_TABLE_PAGE(%r8), %rax
+ movq pa_swap_page(%rip), %r10
+ movq pa_backup_pages_map(%rip), %rdi
+ movq pa_table_page(%rip), %rax
movq %rax, %cr3
lea PAGE_SIZE(%r8), %rsp
call swap_pages
@@ -243,11 +253,11 @@ SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR // RET target, above
- movq RSP(%r8), %rsp
- movq CR4(%r8), %rax
+ movq saved_rsp(%rip), %rsp
+ movq saved_cr4(%rip), %rax
movq %rax, %cr4
- movq CR3(%r8), %rax
- movq CR0(%r8), %r8
+ movq saved_cr3(%rip), %rax
+ movq saved_cr0(%r8), %r8
movq %rax, %cr3
movq %r8, %cr0
movq %rbp, %rax
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index ad451371e179..65f879b31a82 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -100,7 +100,7 @@ const_pcpu_hot = pcpu_hot;
. = ALIGN(PAGE_SIZE); \
__relocate_kernel_start = .; \
*(.text.relocate_kernel); \
- *(.rodata.relocate_kernel); \
+ *(.data.relocate_kernel); \
__relocate_kernel_end = .;
#else
#define KEXEC_RELOCATE_KERNEL_TEXT
--
2.44.0
More information about the kexec
mailing list