[PATCH v5 07/20] x86/kexec: Invoke copy of relocate_kernel() instead of the original

Josh Poimboeuf jpoimboe at kernel.org
Wed Dec 18 13:23:26 PST 2024


On Wed, Dec 18, 2024 at 10:44:25AM +0100, David Woodhouse wrote:
> > At some point we had discussed placing the code in .rodata, was it the
> > alternative preventing that?
> 
> No, the alternative seems to be fine, and it's all in the .data section
> now (since the kernel does write some variables there which are then
> accessed %rip-relative from the code itself).

The linker script does place it in .data, but objtool runs on the object
file before linking, where it's still in an executable section
(.text..relocate_kernel).

How about something like below?

  - move text to .data..relocate_kernel
  - remove objtool annotations
  - replace the alternative with a runtime check

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5081d0b9e290..126d777a5695 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -61,6 +61,7 @@ struct kimage;
 extern unsigned long kexec_va_control_page;
 extern unsigned long kexec_pa_table_page;
 extern unsigned long kexec_pa_swap_page;
+extern unsigned int kexec_preserve_mce;
 extern gate_desc kexec_debug_idt[];
 extern unsigned char kexec_debug_exc_vectors[];
 extern uint16_t kexec_debug_8250_port;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 853f3cdc7c75..751adeea0db8 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -363,6 +363,8 @@ int machine_kexec_prepare(struct kimage *image)
 	if (image->type == KEXEC_TYPE_DEFAULT)
 		kexec_pa_swap_page = page_to_pfn(image->swap_page) << PAGE_SHIFT;
 
+	kexec_preserve_mce = boot_cpu_has(X86_FEATURE_TDX_GUEST);
+
 	prepare_debug_idt((unsigned long)__pa(control_page),
 			  (unsigned long)kexec_debug_exc_vectors - reloc_start);
 
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 4dca50141586..04b7a9f11e98 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -24,8 +24,8 @@
 #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 
 /*
- * The .text..relocate_kernel and .data..relocate_kernel sections are copied
- * into the control page, and the remainder of the page is used as the stack.
+ * The .data..relocate_kernel section is copied into the control page, and the
+ * remainder of the page is used as the stack.
  */
 
 	.section .data..relocate_kernel,"a";
@@ -41,6 +41,7 @@ SYM_DATA(kexec_pa_swap_page, .quad 0)
 SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
 SYM_DATA(kexec_debug_8250_mmio32, .quad 0)
 SYM_DATA(kexec_debug_8250_port, .word 0)
+SYM_DATA(kexec_preserve_mce, .word 0)
 
 #ifdef CONFIG_KEXEC_DEBUG
 	.balign 16
@@ -60,12 +61,14 @@ SYM_DATA_END(kexec_debug_idt)
 
 #endif /* CONFIG_KEXEC_DEBUG */
 
-	.section .text..relocate_kernel,"ax";
+/*
+ * This code is copied rather than called in place.  It's free to use indirect
+ * branches and bare returns, and doesn't need ORC unwinding data.  Keep it in
+ * a data section so objtool doesn't see it.
+ */
+
 	.code64
 SYM_TYPED_FUNC_START(relocate_kernel)
-	UNWIND_HINT_END_OF_STACK
-	UNWIND_HINT_FUNC
-	ANNOTATE_NOENDBR
 	/*
 	 * %rdi indirection_page
 	 * %rsi pa_control_page
@@ -124,12 +127,10 @@ SYM_TYPED_FUNC_START(relocate_kernel)
 	/* jump to identity mapped page */
 	addq	$identity_mapped, %rsi
 	subq	$__relocate_kernel_start, %rsi
-	ANNOTATE_RETPOLINE_SAFE
 	jmp	*%rsi
 SYM_CODE_END(relocate_kernel)
 
 SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
-	UNWIND_HINT_END_OF_STACK
 	/*
 	 * %rdi	indirection page
 	 * %rdx start address
@@ -199,7 +200,11 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
 	 * PAE is always set in the original CR4.
 	 */
 	andl	$(X86_CR4_PAE | X86_CR4_LA57), %r13d
-	ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
+	movw	kexec_preserve_mce(%rip), %ax
+	testw	%ax, %ax
+	jz	.Lwrite_cr4
+	orl	$X86_CR4_MCE, %r13d
+.Lwrite_cr4:
 	movq	%r13, %cr4
 
 	/* Flush the TLB (needed?) */
@@ -250,7 +255,6 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
 	xorl	%r14d, %r14d
 	xorl	%r15d, %r15d
 
-	ANNOTATE_UNRET_SAFE
 	ret
 	int3
 
@@ -264,7 +268,6 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
 	/* push the existing entry point onto the callee's stack */
 	pushq	%rdx
 
-	ANNOTATE_RETPOLINE_SAFE
 	call	*%rdx
 
 	/* get the re-entry point of the peer system */
@@ -276,7 +279,6 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
 
 	/* Find start (and end) of this physical mapping of control page */
 	leaq	(%rip), %r8
-	ANNOTATE_NOENDBR
 	andq	$PAGE_MASK, %r8
 	lea	PAGE_SIZE(%r8), %rsp
 	movq	$1, %r11	/* Ensure preserve_context flag is set */
@@ -285,14 +287,11 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
 	addq	$virtual_mapped, %rax
 	subq	$__relocate_kernel_start, %rax
 	pushq	%rax
-	ANNOTATE_UNRET_SAFE
 	ret
 	int3
 SYM_CODE_END(identity_mapped)
 
 SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
-	UNWIND_HINT_END_OF_STACK
-	ANNOTATE_NOENDBR // RET target, above
 	movq	saved_rsp(%rip), %rsp
 	movq	saved_cr4(%rip), %rax
 	movq	%rax, %cr4
@@ -317,14 +316,12 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
 	popq	%r12
 	popq	%rbp
 	popq	%rbx
-	ANNOTATE_UNRET_SAFE
 	ret
 	int3
 SYM_CODE_END(virtual_mapped)
 
 	/* Do the copies */
 SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
-	UNWIND_HINT_END_OF_STACK
 	/*
 	 * %rdi indirection page
 	 * %r11 preserve_context
@@ -387,7 +384,6 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
 	lea	PAGE_SIZE(%rax), %rsi
 	jmp	.Lloop
 .Ldone:
-	ANNOTATE_UNRET_SAFE
 	ret
 	int3
 SYM_CODE_END(swap_pages)
@@ -404,8 +400,6 @@ SYM_CODE_END(swap_pages)
 #define LSR             5       /*  Line Status               */
 
 SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250)
-	UNWIND_HINT_FUNC
-	ANNOTATE_NOENDBR
 	addw	$LSR, %dx
 	xchg	%al, %ah
 .Lxmtrdy_loop:
@@ -420,15 +414,11 @@ SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250)
 	xchg	%al, %ah
 	outb	%al, %dx
 pr_char_null:
-	ANNOTATE_NOENDBR
 
-	ANNOTATE_UNRET_SAFE
 	ret
 SYM_CODE_END(pr_char_8250)
 
 SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32)
-	UNWIND_HINT_FUNC
-	ANNOTATE_NOENDBR
 .Lxmtrdy_loop_mmio:
 	movb	(LSR*4)(%rdx), %ah
 	testb	$XMTRDY, %ah
@@ -438,7 +428,6 @@ SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32)
 
 .Lready_mmio:
 	movb	%al, (%rdx)
-	ANNOTATE_UNRET_SAFE
 	ret
 SYM_CODE_END(pr_char_8250_mmio32)
 
@@ -463,7 +452,6 @@ SYM_CODE_END(pr_char_8250_mmio32)
 
 /* Print the nybble in %bl, clobber %rax */
 SYM_CODE_START_LOCAL_NOALIGN(pr_nybble)
-	UNWIND_HINT_FUNC
 	movb	%bl, %al
 	nop
 	andb	$0x0f, %al
@@ -471,33 +459,26 @@ SYM_CODE_START_LOCAL_NOALIGN(pr_nybble)
 	cmpb	$0x3a, %al
 	jb	1f
 	addb	$('a' - '0' - 10), %al
-	ANNOTATE_RETPOLINE_SAFE
 1:	jmp	*%rsi
 SYM_CODE_END(pr_nybble)
 
 SYM_CODE_START_LOCAL_NOALIGN(pr_qword)
-	UNWIND_HINT_FUNC
 	movq	$16, %rcx
 1:	rolq	$4, %rbx
 	call	pr_nybble
 	loop	1b
 	movb	$'\n', %al
-	ANNOTATE_RETPOLINE_SAFE
 	jmp	*%rsi
 SYM_CODE_END(pr_qword)
 
 .macro print_reg a, b, c, d, r
 	movb	$\a, %al
-	ANNOTATE_RETPOLINE_SAFE
 	call	*%rsi
 	movb	$\b, %al
-	ANNOTATE_RETPOLINE_SAFE
 	call	*%rsi
 	movb	$\c, %al
-	ANNOTATE_RETPOLINE_SAFE
 	call	*%rsi
 	movb	$\d, %al
-	ANNOTATE_RETPOLINE_SAFE
 	call	*%rsi
 	movq	\r, %rbx
 	call	pr_qword
@@ -506,7 +487,6 @@ SYM_CODE_END(pr_qword)
 SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors)
 	/* Each of these is 6 bytes. */
 .macro vec_err exc
-	UNWIND_HINT_ENTRY
 	. = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
 	nop
 	nop
@@ -515,14 +495,12 @@ SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors)
 .endm
 
 .macro vec_noerr exc
-	UNWIND_HINT_ENTRY
 	. = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
 	pushq	$0
 	pushq	$\exc
 	jmp	exc_handler
 .endm
 
-	ANNOTATE_NOENDBR
 	vec_noerr 0 // #DE
 	vec_noerr 1 // #DB
 	vec_noerr 2 // #NMI
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 63ff60a11be5..9c2abfd6ea26 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -100,7 +100,6 @@ const_pcpu_hot = pcpu_hot;
 #define KEXEC_RELOCATE_KERNEL					\
 	. = ALIGN(0x100);					\
 	__relocate_kernel_start = .;				\
-	*(.text..relocate_kernel);				\
 	*(.data..relocate_kernel);				\
 	__relocate_kernel_end = .;
 



More information about the kexec mailing list