[PATCH v3] arm64: sdei: abort running SDEI handlers during crash

D Scott Phillips scott at os.amperecomputing.com
Thu Jun 22 13:35:14 PDT 2023


D Scott Phillips <scott at os.amperecomputing.com> writes:

> Interrupts are blocked in SDEI context, per the SDEI spec: "The client
> interrupts cannot preempt the event handler." If we crashed in the SDEI
> handler-running context (as with ACPI's AGDI) then we need to clean up the
> SDEI state before proceeding to the crash kernel so that the crash kernel
> can have working interrupts.
>
> Track the active SDEI handler per-cpu so that we can COMPLETE_AND_RESUME
> the handler, discarding the interrupted context.
>
> Fixes: f5df26961853 ("arm64: kernel: Add arch-specific SDEI entry code and CPU masking")
> Signed-off-by: D Scott Phillips <scott at os.amperecomputing.com>

Ping on this one

> ---
>  Changes since v2:
>  - Dropped the patch fiddling with the sdei conduit.
>  v2 Link: https://lore.kernel.org/linux-arm-kernel/20230329202519.6110-1-scott@os.amperecomputing.com/
>
>  Changes since v1:
>  - Store the active SDEI event being handled per-cpu, use the per-cpu active
>    handler information to know when to abort.
>  - Add prints before attempting to abort sdei handlers.
>  v1 Link: https://lore.kernel.org/linux-arm-kernel/20230204000851.3871-1-scott@os.amperecomputing.com/
>
>  arch/arm64/include/asm/sdei.h |  8 ++++++++
>  arch/arm64/kernel/entry.S     | 25 +++++++++++++++++++++++++
>  arch/arm64/kernel/sdei.c      |  3 +++
>  arch/arm64/kernel/smp.c       | 27 +++++++++++++++++++++++----
>  4 files changed, 59 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
> index 4292d9bafb9d..dc2f038a61ef 100644
> --- a/arch/arm64/include/asm/sdei.h
> +++ b/arch/arm64/include/asm/sdei.h
> @@ -17,6 +17,11 @@
>  
>  #include <asm/virt.h>
>  
> +#ifdef CONFIG_ARM_SDE_INTERFACE
> +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
> +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
> +#endif
> +
>  extern unsigned long sdei_exit_mode;
>  
>  /* Software Delegated Exception entry point from firmware*/
> @@ -29,6 +34,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
>  						   unsigned long pc,
>  						   unsigned long pstate);
>  
> +/* Abort a running handler. Context is discarded. */
> +void sdei_handler_abort(void);
> +
>  /*
>   * The above entry point does the minimum to call C code. This function does
>   * anything else, before calling the driver.
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index ab2a6e33c052..e49f72b5fb63 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -1007,6 +1007,12 @@ SYM_CODE_START(__sdei_asm_handler)
>  	ldrb	w4, [x19, #SDEI_EVENT_PRIORITY]
>  #endif
>  
> +	cbnz	w4, 1f
> +	adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
> +	b	2f
> +1:	adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
> +2:	str	x19, [x5]
> +
>  #ifdef CONFIG_VMAP_STACK
>  	/*
>  	 * entry.S may have been using sp as a scratch register, find whether
> @@ -1072,6 +1078,13 @@ SYM_CODE_START(__sdei_asm_handler)
>  
>  	ldr_l	x2, sdei_exit_mode
>  
> +	ldrb	w3, [x4, #SDEI_EVENT_PRIORITY]
> +	cbnz	w3, 1f
> +	adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
> +	b	2f
> +1:	adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
> +2:	str	xzr, [x5]
> +
>  alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
>  	sdei_handler_exit exit_mode=x2
>  alternative_else_nop_endif
> @@ -1082,4 +1095,16 @@ alternative_else_nop_endif
>  #endif
>  SYM_CODE_END(__sdei_asm_handler)
>  NOKPROBE(__sdei_asm_handler)
> +
> +SYM_CODE_START(sdei_handler_abort)
> +	mov_q	x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
> +	adr	x1, 1f
> +	ldr_l	x2, sdei_exit_mode
> +	sdei_handler_exit exit_mode=x2
> +	// exit the handler and jump to the next instruction.
> +	// Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx.
> +1:	ret
> +SYM_CODE_END(sdei_handler_abort)
> +NOKPROBE(sdei_handler_abort)
> +
>  #endif /* CONFIG_ARM_SDE_INTERFACE */
> diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
> index 830be01af32d..255d12f881c2 100644
> --- a/arch/arm64/kernel/sdei.c
> +++ b/arch/arm64/kernel/sdei.c
> @@ -47,6 +47,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
>  DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
>  #endif
>  
> +DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
> +DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
> +
>  static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
>  {
>  	unsigned long *p;
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 4e8327264255..ea1595b51590 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -1047,10 +1047,8 @@ void crash_smp_send_stop(void)
>  	 * If this cpu is the only one alive at this point in time, online or
>  	 * not, there are no stop messages to be sent around, so just back out.
>  	 */
> -	if (num_other_online_cpus() == 0) {
> -		sdei_mask_local_cpu();
> -		return;
> -	}
> +	if (num_other_online_cpus() == 0)
> +		goto skip_ipi;
>  
>  	cpumask_copy(&mask, cpu_online_mask);
>  	cpumask_clear_cpu(smp_processor_id(), &mask);
> @@ -1069,7 +1067,28 @@ void crash_smp_send_stop(void)
>  		pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
>  			cpumask_pr_args(&mask));
>  
> +skip_ipi:
>  	sdei_mask_local_cpu();
> +
> +#ifdef CONFIG_ARM_SDE_INTERFACE
> +	/*
> +	 * If the crash happened in an SDEI event handler then we need to
> +	 * finish the handler with the firmware so that we can have working
> +	 * interrupts in the crash kernel.
> +	 */
> +	if (__this_cpu_read(sdei_active_critical_event)) {
> +		pr_warn("SMP: stopped CPUs from SDEI critical event handler "
> +			"context, attempting to finish handler.\n");
> +		sdei_handler_abort();
> +		__this_cpu_write(sdei_active_critical_event, NULL);
> +	}
> +	if (__this_cpu_read(sdei_active_normal_event)) {
> +		pr_warn("SMP: stopped CPUs from SDEI normal event handler "
> +			"context, attempting to finish handler.\n");
> +		sdei_handler_abort();
> +		__this_cpu_write(sdei_active_normal_event, NULL);
> +	}
> +#endif
>  }
>  
>  bool smp_crash_stop_failed(void)
> -- 
> 2.40.1
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



More information about the linux-arm-kernel mailing list