[RFC PATCH v3 4/8] arm64: Handle early CPU boot failures

Will Deacon will.deacon at arm.com
Tue Dec 15 03:55:19 PST 2015


On Wed, Dec 09, 2015 at 09:57:15AM +0000, Suzuki K. Poulose wrote:
> A secondary CPU could fail to come online due to insufficient
> capabilities and could simply die or loop in the kernel.
> e.g, a CPU with no support for the selected kernel PAGE_SIZE
> loops in kernel with MMU turned off.
> or a hotplugged CPU which doesn't have one of the advertised
> system capability will die during the activation.
> 
> There is no way to synchronise the status of the failing CPU
> back to the master. This patch solves the issue by adding a
> field to the secondary_data which can be updated by the failing
> CPU.
> 
> Here are the possible states :
> 
>  -1. CPU_WAIT_STATUS - Initial value set by the master CPU.
> 
>  0. CPU_BOOT_SUCCESS - CPU has booted successfully.
> 
>  1. CPU_KILL_ME - CPU has invoked cpu_ops->die, indicating the
> master CPU to synchronise by issuing a cpu_ops->cpu_kill.
> 
>  2. CPU_STUCK_IN_KERNEL - CPU couldn't invoke die(), instead is
> looping in the kernel. This information could be used by say,
> kexec to check if it is really safe to do a kexec reboot.
> 
>  3. CPU_PANIC_KERNEL - CPU detected some serious issues which
> requires kernel to crash immediately. The secondary CPU cannot
> call panic() until it has initialised the GIC. This flag can
> be used to instruct the master to do so.
> 
> Cc: Will Deacon <will.deacon at arm.com>
> Cc: Mark Rutland <mark.rutland at arm.com>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Signed-off-by: Suzuki K. Poulose <suzuki.poulose at arm.com>
> ---
>  arch/arm64/include/asm/smp.h    |   24 +++++++++++++++++++-
>  arch/arm64/kernel/asm-offsets.c |    3 +++
>  arch/arm64/kernel/head.S        |   19 ++++++++++++++--
>  arch/arm64/kernel/smp.c         |   48 ++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 90 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
> index 13ce01f..240ab3d 100644
> --- a/arch/arm64/include/asm/smp.h
> +++ b/arch/arm64/include/asm/smp.h
> @@ -16,6 +16,19 @@
>  #ifndef __ASM_SMP_H
>  #define __ASM_SMP_H
>  
> +/* Values for secondary_data.status */
> +
> +#define CPU_WAIT_RESULT		-1
> +#define CPU_BOOT_SUCCESS	0
> +/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
> +#define CPU_KILL_ME		1
> +/* The cpu couldn't die gracefully and is looping in the kernel */
> +#define CPU_STUCK_IN_KERNEL	2
> +/* Fatal system error detected by secondary CPU, crash the system */
> +#define CPU_PANIC_KERNEL	3
> +
> +#ifndef __ASSEMBLY__
> +
>  #include <linux/threads.h>
>  #include <linux/cpumask.h>
>  #include <linux/thread_info.h>
> @@ -54,10 +67,15 @@ asmlinkage void secondary_start_kernel(void);
>  
>  /*
>   * Initial data for bringing up a secondary CPU.
> + * @stack  - sp for the secondary CPU
> + * @status - Result passed back from the secondary CPU to
> + *           indicate failure.
>   */
>  struct secondary_data {
>  	void *stack;
> -};
> +	unsigned long status;
> +} ____cacheline_aligned;

Why is this necessary?

> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index b225d34..e0a42dd 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -34,6 +34,7 @@
>  #include <asm/pgtable-hwdef.h>
>  #include <asm/pgtable.h>
>  #include <asm/page.h>
> +#include <asm/smp.h>
>  #include <asm/sysreg.h>
>  #include <asm/thread_info.h>
>  #include <asm/virt.h>
> @@ -605,7 +606,7 @@ ENTRY(secondary_startup)
>  ENDPROC(secondary_startup)
>  
>  ENTRY(__secondary_switched)
> -	ldr	x0, [x22]			// get secondary_data.stack
> +	ldr	x0, [x22, #CPU_BOOT_STACK]	// get secondary_data.stack
>  	mov	sp, x0
>  	mov	x29, #0
>  	b	secondary_start_kernel
> @@ -615,6 +616,7 @@ ENDPROC(__secondary_switched)
>   * Enable the MMU.
>   *
>   *  x0  = SCTLR_EL1 value for turning on the MMU.
> + *  x22 = __va(secondary_data)
>   *  x27 = *virtual* address to jump to upon completion
>   *
>   * Other registers depend on the function called upon completion.
> @@ -647,6 +649,19 @@ __enable_mmu:
>  ENDPROC(__enable_mmu)
>  
>  __no_granule_support:
> +	/* Indicate that this CPU can't boot and is stuck in the kernel */
> +	cmp	x22, 0				// Boot CPU doesn't update status
> +	b.eq	1f
> +	adrp	x1, secondary_data
> +	add	x1, x1, #:lo12:secondary_data	// x1 = __pa(secondary_data)

This feels a bit grotty. You end up using the virtual address of
secondary_data to figure out if you're the boot CPU or not, and then you
end up having to compute the physical address of the same variable anyway.

> +	mov	x0, #CPU_STUCK_IN_KERNEL
> +	str	x0, [x1, #CPU_BOOT_STATUS]	// update the secondary_data.status
> +	/* flush the data to PoC */

Can you call __inval_cache_range instead of open-coding this?

> +	isb

Why?

> +	dc	civac, x1
> +	dsb	sy
> +	isb

Why?

> +1:
>  	wfe

Curious, but do you know why we don't have a wfi here too (like we do in
the C code)?

> -	b __no_granule_support
> +	b 1b
>  ENDPROC(__no_granule_support)
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 607d876..708f4b1 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -63,6 +63,8 @@
>   * where to place its SVC stack
>   */
>  struct secondary_data secondary_data;
> +/* Number of CPUs which aren't online, but looping in kernel text. */
> +u32 cpus_stuck_in_kernel;
>  
>  enum ipi_msg_type {
>  	IPI_RESCHEDULE,
> @@ -72,6 +74,16 @@ enum ipi_msg_type {
>  	IPI_IRQ_WORK,
>  };
>  
> +#ifdef CONFIG_HOTPLUG_CPU
> +static int op_cpu_kill(unsigned int cpu);
> +#else
> +static inline int op_cpu_kill(unsigned int cpu)
> +{
> +	return -ENOSYS;
> +}
> +#endif
> +
> +
>  /*
>   * Boot a secondary CPU, and assign it the specified idle task.
>   * This also gives us the initial stack to use for this CPU.
> @@ -86,6 +98,12 @@ static int boot_secondary(unsigned int cpu, struct task_struct *idle)
>  
>  static DECLARE_COMPLETION(cpu_running);
>  
> +void update_cpu_boot_status(unsigned long status)
> +{
> +	secondary_data.status = status;
> +	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
> +}
> +
>  int __cpu_up(unsigned int cpu, struct task_struct *idle)
>  {
>  	int ret;
> @@ -95,7 +113,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
>  	 * page tables.
>  	 */
>  	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
> -	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
> +	update_cpu_boot_status(CPU_WAIT_RESULT);
>  
>  	/*
>  	 * Now bring the CPU into our world.
> @@ -117,7 +135,31 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
>  		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
>  	}
>  
> +	mb();

Why? (and why not smp_mb(), if the barrier is actually needed?).

> +
>  	secondary_data.stack = NULL;
> +	if (ret && secondary_data.status) {
> +		switch(secondary_data.status) {
> +		default:
> +			pr_err("CPU%u: failed in unknown state : 0x%lx\n",
> +					cpu, secondary_data.status);
> +			break;
> +		case CPU_KILL_ME:
> +			if (op_cpu_kill(cpu)) {
> +				pr_crit("CPU%u: may not have shut down cleanly\n",
> +							cpu);
> +				cpus_stuck_in_kernel++;

Maybe restructure this so the failed kill case is a fallthrough to the
CPU_STUCK_IN_KERNEL case?

> +			} else
> +				pr_crit("CPU%u: died during early boot\n", cpu);

Missing braces.

> +			break;
> +		case CPU_STUCK_IN_KERNEL:
> +			pr_crit("CPU%u: is stuck in kernel\n", cpu);
> +			cpus_stuck_in_kernel++;
> +			break;
> +		case CPU_PANIC_KERNEL:
> +			panic("CPU%u detected unsupported configuration\n", cpu);

It would nice to show what the configuration difference was, but maybe
that's work for later.

> +		}
> +	}
>  
>  	return ret;
>  }
> @@ -185,6 +227,7 @@ asmlinkage void secondary_start_kernel(void)
>  	 */
>  	pr_info("CPU%u: Booted secondary processor [%08x]\n",
>  					 cpu, read_cpuid_id());
> +	update_cpu_boot_status(CPU_BOOT_SUCCESS);

Why do we need to continue with the cacheflushing at this point?

>  	set_cpu_online(cpu, true);
>  	complete(&cpu_running);
>  
> @@ -327,10 +370,13 @@ void cpu_die_early(void)
>  	set_cpu_present(cpu, 0);
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> +	update_cpu_boot_status(CPU_KILL_ME);
>  	/* Check if we can park ourselves */
>  	if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
>  		cpu_ops[cpu]->cpu_die(cpu);
>  #endif
> +	update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
> +	__flush_dcache_area(&secondary_data, sizeof(secondary_data));

Extra flushing, but I don't see why you need it at all when you're
running in C on the CPU coming up.

Will



More information about the linux-arm-kernel mailing list