[PATCH v18 03/13] arm64/kexec: Add core kexec support

James Morse james.morse at arm.com
Wed Jun 15 10:10:51 PDT 2016


Hi Geoff,

Looks good, I have a few observations and questions below.

On 09/06/16 21:08, Geoff Levand wrote:
> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
> arm64 architecture that add support for the kexec re-boot mechanism
> (CONFIG_KEXEC) on arm64 platforms.
> 
> Signed-off-by: Geoff Levand <geoff at infradead.org>

> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> new file mode 100644
> index 0000000..05f7c21
> --- /dev/null
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -0,0 +1,185 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + * Copyright (C) Huawei Futurewei Technologies.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/highmem.h>

We don't have/need highmem on arm64. The kmap()/kunmap() calls just obscure what
is going on.


> +#include <linux/kexec.h>
> +#include <linux/of_fdt.h>

What do you need of_fdt.h for? I guess this should be in patch 4.


> +#include <linux/slab.h>

The control page was already allocated, I can't see anything else being
allocated... What do you need slab.h for?


> +#include <linux/smp.h>
> +#include <linux/uaccess.h>

User space access? I guess this should be in patch 4.


> +
> +#include <asm/cacheflush.h>
> +#include <asm/cpu_ops.h>
> +#include <asm/mmu_context.h>
> +#include <asm/system_misc.h>

I can't see anything in system_misc.h that you are using in here.


> +
> +#include "cpu-reset.h"
> +
> +/* Global variables for the arm64_relocate_new_kernel routine. */
> +extern const unsigned char arm64_relocate_new_kernel[];
> +extern const unsigned long arm64_relocate_new_kernel_size;
> +
> +static unsigned long kimage_start;
> +
> +void machine_kexec_cleanup(struct kimage *kimage)
> +{
> +	/* Empty routine needed to avoid build errors. */
> +}
> +
> +/**
> + * machine_kexec_prepare - Prepare for a kexec reboot.
> + *
> + * Called from the core kexec code when a kernel image is loaded.
> + * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
> + * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
> + */
> +int machine_kexec_prepare(struct kimage *kimage)
> +{
> +	kimage_start = kimage->start;
> +
> +	if (kimage->type != KEXEC_TYPE_CRASH) {
> +		if (cpus_are_stuck_in_kernel()) {
> +			pr_err("Can't kexec: failed CPUs are stuck in the kernel.\n");
> +			return -EBUSY;
> +		}
> +
> +		if (num_online_cpus() > 1) {
> +#ifdef CONFIG_HOTPLUG_CPU
> +			/* any_cpu as we don't mind being preempted */
> +			int any_cpu = raw_smp_processor_id();
> +
> +			if (cpu_ops[any_cpu]->cpu_die)
> +				return 0;
> +#endif /* CONFIG_HOTPLUG_CPU */
> +
> +			pr_err("Can't kexec: no mechanism to offline secondary CPUs.\n");
> +			return -EBUSY;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * kexec_list_flush - Helper to flush the kimage list to PoC.
> + */
> +static void kexec_list_flush(struct kimage *kimage)
> +{
> +	kimage_entry_t *entry;
> +	unsigned int flag;
> +
> +	for (entry = &kimage->head, flag = 0; flag != IND_DONE; entry++) {
> +		void *addr = kmap(phys_to_page(*entry & PAGE_MASK));
> +
> +		flag = *entry & IND_FLAGS;
> +
> +		switch (flag) {
> +		case IND_INDIRECTION:
> +			entry = (kimage_entry_t *)addr - 1;

This '-1' is so that entry points before the first entry of the new table,
and is un-done by entry++ next time round the loop...
If I'm right, could you add a comment to that effect? It took me a little while
to work out!

kexec_core.c has a snazzy macro: for_each_kimage_entry(), its a shame its not in
a header file.
This loop does the same but with two variables instead of three. These
IN_INDIRECTION pages only appear at the end of a list, this list-walking looks
correct.


> +			__flush_dcache_area(addr, PAGE_SIZE);

So if we find an indirection pointer, we switch entry to the new page, and clean
it to the PoC, because later we walk this list with the MMU off.

But what cleans the very first page?


> +			break;
> +		case IND_DESTINATION:
> +			break;
> +		case IND_SOURCE:
> +			__flush_dcache_area(addr, PAGE_SIZE);
> +			break;
> +		case IND_DONE:
> +			break;
> +		default:
> +			BUG();

Unless you think its less readable, you could group the clauses together:

> 		case IND_INDIRECTION:
> 			entry = (kimage_entry_t *)addr - 1;
> 		case IND_SOURCE:
> 			__flush_dcache_area(addr, PAGE_SIZE);
> 		case IND_DESTINATION:
> 		case IND_DONE:
> 			break;


> +		}
> +		kunmap(addr);
> +	}
> +}
> +
> +/**
> + * kexec_segment_flush - Helper to flush the kimage segments to PoC.
> + */
> +static void kexec_segment_flush(const struct kimage *kimage)
> +{
> +	unsigned long i;
> +
> +	pr_debug("%s:\n", __func__);
> +
> +	for (i = 0; i < kimage->nr_segments; i++) {
> +		pr_debug("  segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
> +			i,
> +			kimage->segment[i].mem,
> +			kimage->segment[i].mem + kimage->segment[i].memsz,
> +			kimage->segment[i].memsz,
> +			kimage->segment[i].memsz /  PAGE_SIZE);
> +
> +		__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
> +			kimage->segment[i].memsz);
> +	}
> +}
> +
> +/**
> + * machine_kexec - Do the kexec reboot.
> + *
> + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
> + */
> +void machine_kexec(struct kimage *kimage)
> +{
> +	phys_addr_t reboot_code_buffer_phys;
> +	void *reboot_code_buffer;
> +
> +	/*
> +	 * New cpus may have become stuck_in_kernel after we loaded the image.
> +	 */
> +	BUG_ON(cpus_are_stuck_in_kernel() && (num_online_cpus() > 1));
> +
> +	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
> +	reboot_code_buffer = kmap(kimage->control_code_page);
> +
> +	/*
> +	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
> +	 * after the kernel is shut down.
> +	 */
> +	memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
> +		arm64_relocate_new_kernel_size);
> +
> +	/* Flush the reboot_code_buffer in preparation for its execution. */
> +	__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
> +	flush_icache_range((uintptr_t)reboot_code_buffer,
> +		arm64_relocate_new_kernel_size);
> +
> +	/* Flush the kimage list. */
> +	kexec_list_flush(kimage);
> +
> +	/* Flush the new image if already in place. */
> +	if (kimage->head & IND_DONE)
> +		kexec_segment_flush(kimage);
> +
> +	pr_info("Bye!\n");
> +
> +	/* Disable all DAIF exceptions. */
> +	asm volatile ("msr daifset, #0xf" : : : "memory");
> +
> +	/*
> +	 * cpu_soft_restart will shutdown the MMU, disable data caches, then
> +	 * transfer control to the reboot_code_buffer which contains a copy of
> +	 * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
> +	 * uses physical addressing to relocate the new image to its final
> +	 * position and transfers control to the image entry point when the
> +	 * relocation is complete.
> +	 */
> +
> +	cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
> +		kimage_start, 0);
> +
> +	BUG(); /* Should never get here. */
> +}
> +
> +void machine_crash_shutdown(struct pt_regs *regs)
> +{
> +	/* Empty routine needed to avoid build errors. */
> +}
> diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> new file mode 100644
> index 0000000..e380db3
> --- /dev/null
> +++ b/arch/arm64/kernel/relocate_kernel.S
> @@ -0,0 +1,131 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + * Copyright (C) Huawei Futurewei Technologies.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kexec.h>
> +
> +#include <asm/assembler.h>
> +#include <asm/kexec.h>
> +#include <asm/page.h>
> +#include <asm/sysreg.h>
> +
> +/*
> + * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
> + *
> + * The memory that the old kernel occupies may be overwritten when coping the
> + * new image to its final location.  To assure that the
> + * arm64_relocate_new_kernel routine which does that copy is not overwritten,
> + * all code and data needed by arm64_relocate_new_kernel must be between the
> + * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
> + * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
> + * control_code_page, a special page which has been set up to be preserved
> + * during the copy operation.
> + */
> +.globl arm64_relocate_new_kernel
> +arm64_relocate_new_kernel:

All the other asm functions use ENTRY(), which would do the .globl and alignment
for you. (You would need a ENDPROC(arm64_relocate_new_kernel) too.)


> +
> +	/* Setup the list loop variables. */
> +	mov	x18, x1				/* x18 = kimage_start */
> +	mov	x17, x0				/* x17 = kimage_head */
> +	dcache_line_size x16, x0		/* x16 = dcache line size */
> +	mov	x15, xzr			/* x15 = segment start */

What uses this 'segment start'?


> +	mov	x14, xzr			/* x14 = entry ptr */
> +	mov	x13, xzr			/* x13 = copy dest */
> +
> +	/* Clear the sctlr_el2 flags. */
> +	mrs	x0, CurrentEL
> +	cmp	x0, #CurrentEL_EL2
> +	b.ne	1f
> +	mrs	x0, sctlr_el2
> +	ldr	x1, =SCTLR_ELx_FLAGS
> +	bic	x0, x0, x1
> +	msr	sctlr_el2, x0
> +	isb
> +1:
> +
> +	/* Check if the new image needs relocation. */
> +	cbz	x17, .Ldone

Does this happen? Do we ever come across an empty slot in the tables?

kimage_terminate() adds the IND_DONE entry, so we should never see an empty
slot. kexec_list_flush() would BUG() on this too, and we call that
unconditionally on the way in here.


> +	tbnz	x17, IND_DONE_BIT, .Ldone
> +
> +.Lloop:
> +	and	x12, x17, PAGE_MASK		/* x12 = addr */
> +
> +	/* Test the entry flags. */
> +.Ltest_source:
> +	tbz	x17, IND_SOURCE_BIT, .Ltest_indirection
> +
> +	/* Invalidate dest page to PoC. */
> +	mov     x0, x13
> +	add     x20, x0, #PAGE_SIZE
> +	sub     x1, x16, #1
> +	bic     x0, x0, x1
> +2:	dc      ivac, x0

This relies on an IND_DESTINATION being found first for x13 to be set to
something other than 0. I guess if kexec-core hands us a broken list, all bets
are off!


> +	add     x0, x0, x16
> +	cmp     x0, x20
> +	b.lo    2b
> +	dsb     sy
> +
> +	mov x20, x13
> +	mov x21, x12
> +	copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
> +
> +	/* dest += PAGE_SIZE */
> +	add	x13, x13, PAGE_SIZE
> +	b	.Lnext
> +
> +.Ltest_indirection:
> +	tbz	x17, IND_INDIRECTION_BIT, .Ltest_destination
> +
> +	/* ptr = addr */
> +	mov	x14, x12
> +	b	.Lnext
> +
> +.Ltest_destination:
> +	tbz	x17, IND_DESTINATION_BIT, .Lnext
> +
> +	mov	x15, x12

What uses this 'segment start'?


> +
> +	/* dest = addr */
> +	mov	x13, x12
> +
> +.Lnext:
> +	/* entry = *ptr++ */
> +	ldr	x17, [x14], #8
> +
> +	/* while (!(entry & DONE)) */
> +	tbz	x17, IND_DONE_BIT, .Lloop
> +
> +.Ldone:

        /* wait for writes from copy_page to finish */
> +	dsb	nsh
> +	ic	iallu
> +	dsb	nsh
> +	isb
> +
> +	/* Start new image. */
> +	mov	x0, xzr
> +	mov	x1, xzr
> +	mov	x2, xzr
> +	mov	x3, xzr
> +	br	x18
> +
> +.ltorg
> +
> +.align 3	/* To keep the 64-bit values below naturally aligned. */
> +
> +.Lcopy_end:
> +.org	KEXEC_CONTROL_PAGE_SIZE

Why do we need to pad up to KEXEC_CONTROL_PAGE_SIZE?
In machine_kexec() we only copy arm64_relocate_new_kernel_size bytes, so it
shouldn't matter what is here. As far as I can see we don't even access it.


> +
> +/*
> + * arm64_relocate_new_kernel_size - Number of bytes to copy to the
> + * control_code_page.
> + */
> +.globl arm64_relocate_new_kernel_size
> +arm64_relocate_new_kernel_size:
> +	.quad	.Lcopy_end - arm64_relocate_new_kernel


Thanks,

James




More information about the kexec mailing list