[RFC PATCH 2/2] x86/kexec: Add data section to relocate_kernel

H. Peter Anvin hpa at zytor.com
Fri Nov 8 03:26:58 PST 2024


On November 8, 2024 6:22:41 AM GMT+01:00, David Woodhouse <dwmw2 at infradead.org> wrote:
>From: David Woodhouse <dwmw at amazon.co.uk>
>
>Now that it's handled sanely by a linker script we can have actual data,
>and just use %rip-relative addressing to access it.
>
>If we could call the *copy* instead of the original relocate_kernel in
>the kernel text, then we could use %rip-relative addressing everywhere.
>
>Signed-off-by: David Woodhouse <dwmw at amazon.co.uk>
>---
> arch/x86/kernel/relocate_kernel_64.S | 58 ++++++++++++++++------------
> arch/x86/kernel/vmlinux.lds.S        |  2 +-
> 2 files changed, 35 insertions(+), 25 deletions(-)
>
>diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
>index 1efcbd340528..577aa1672349 100644
>--- a/arch/x86/kernel/relocate_kernel_64.S
>+++ b/arch/x86/kernel/relocate_kernel_64.S
>@@ -27,18 +27,28 @@
>  * ~ control_page + PAGE_SIZE are used as data storage and stack for
>  * jumping back
>  */
>-#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
> 
>+	.section .data.relocate_kernel,"a";
> /* Minimal CPU state */
>-#define RSP			DATA(0x0)
>-#define CR0			DATA(0x8)
>-#define CR3			DATA(0x10)
>-#define CR4			DATA(0x18)
>-
>+SYM_DATA_LOCAL(saved_rsp, .quad 0)
>+SYM_DATA_LOCAL(saved_cr0, .quad 0)
>+SYM_DATA_LOCAL(saved_cr3, .quad 0)
>+SYM_DATA_LOCAL(saved_cr4, .quad 0)
> /* other data */
>-#define CP_PA_TABLE_PAGE	DATA(0x20)
>-#define CP_PA_SWAP_PAGE		DATA(0x28)
>-#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)
>+SYM_DATA_LOCAL(pa_table_page, .quad 0)
>+SYM_DATA_LOCAL(pa_swap_page, .quad 0)
>+SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
>+
>+/*
>+ * There are two physical copies of relocate_kernel(), one in the original
>+ * Kernel text and the other copied to the control page. There is a virtual
>+ * mapping of each, in the original kernel. It is the *original* which is
>+ * called from machine_kexec(), largely becaose the copy isn't mapped as an
>+ * executable page. Thus, this code cannot just use %rip-relative addressing
>+ * until after the %cr3 change and the jump to identity_mapped(). Until
>+ * then, some pointer arithmetic is required.
>+ */
>+#define DATA(x) (x - relocate_kernel)
> 
> 	.section .text.relocate_kernel,"ax";
> 	.code64
>@@ -63,13 +73,13 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
> 	pushf
> 
> 	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
>-	movq	%rsp, RSP(%r11)
>+	movq	%rsp, DATA(saved_rsp)(%r11)
> 	movq	%cr0, %rax
>-	movq	%rax, CR0(%r11)
>+	movq	%rax, DATA(saved_cr0)(%r11)
> 	movq	%cr3, %rax
>-	movq	%rax, CR3(%r11)
>+	movq	%rax, DATA(saved_cr3)(%r11)
> 	movq	%cr4, %rax
>-	movq	%rax, CR4(%r11)
>+	movq	%rax, DATA(saved_cr4)(%r11)
> 
> 	/* Save CR4. Required to enable the right paging mode later. */
> 	movq	%rax, %r13
>@@ -94,9 +104,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
> 	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10
> 
> 	/* save some information for jumping back */
>-	movq	%r9, CP_PA_TABLE_PAGE(%r11)
>-	movq	%r10, CP_PA_SWAP_PAGE(%r11)
>-	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
>+	movq	%r9, DATA(pa_table_page)(%r11)
>+	movq	%r10, DATA(pa_swap_page)(%r11)
>+	movq	%rdi, DATA(pa_backup_pages_map)(%r11)
> 
> 	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
> 	movq	%rcx, %r11
>@@ -128,7 +138,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
> 	/* set return address to 0 if not preserving context */
> 	pushq	$0
> 	/* store the start address on the stack */
>-	pushq   %rdx
>+	pushq   start_address(%rip)
> 
> 	/*
> 	 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
>@@ -227,9 +237,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
> 	/* get the re-entry point of the peer system */
> 	movq	0(%rsp), %rbp
> 	leaq	relocate_kernel(%rip), %r8
>-	movq	CP_PA_SWAP_PAGE(%r8), %r10
>-	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
>-	movq	CP_PA_TABLE_PAGE(%r8), %rax
>+	movq	pa_swap_page(%rip), %r10
>+	movq	pa_backup_pages_map(%rip), %rdi
>+	movq	pa_table_page(%rip), %rax
> 	movq	%rax, %cr3
> 	lea	PAGE_SIZE(%r8), %rsp
> 	call	swap_pages
>@@ -243,11 +253,11 @@ SYM_CODE_END(identity_mapped)
> SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
> 	UNWIND_HINT_END_OF_STACK
> 	ANNOTATE_NOENDBR // RET target, above
>-	movq	RSP(%r8), %rsp
>-	movq	CR4(%r8), %rax
>+	movq	saved_rsp(%rip), %rsp
>+	movq	saved_cr4(%rip), %rax
> 	movq	%rax, %cr4
>-	movq	CR3(%r8), %rax
>-	movq	CR0(%r8), %r8
>+	movq	saved_cr3(%rip), %rax
>+	movq	saved_cr0(%r8), %r8
> 	movq	%rax, %cr3
> 	movq	%r8, %cr0
> 	movq	%rbp, %rax
>diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
>index ad451371e179..65f879b31a82 100644
>--- a/arch/x86/kernel/vmlinux.lds.S
>+++ b/arch/x86/kernel/vmlinux.lds.S
>@@ -100,7 +100,7 @@ const_pcpu_hot = pcpu_hot;
> 	. = ALIGN(PAGE_SIZE);					\
> 	__relocate_kernel_start = .;				\
> 	*(.text.relocate_kernel);				\
>-	*(.rodata.relocate_kernel);				\
>+	*(.data.relocate_kernel);				\
> 	__relocate_kernel_end = .;
> #else
> #define KEXEC_RELOCATE_KERNEL_TEXT

Looks good at first glance. I'm currently traveling so I haven't fully reviewed it though.



More information about the kexec mailing list