[PATCH] arm64: entry: always set GIC_PRIO_PSR_I_SET during entry

Marc Zyngier maz at kernel.org
Wed Apr 28 15:41:17 BST 2021


Hi Mark,

On Wed, 28 Apr 2021 12:15:55 +0100,
Mark Rutland <mark.rutland at arm.com> wrote:
> 
> Zenghui reports that booting a kernel with "irqchip.gicv3_pseudo_nmi=1"
> on the command line hits a warning during kernel entry, due to the way
> we manipulate the PMR.
> 
> Early in the entry sequence, we call lockdep_hardirqs_off() to inform
> lockdep that interrupts have been masked (as the HW sets DAIF wqhen
> entering an exception). Architecturally PMR_EL1 is not affected by
> exception entry, and we don't set GIC_PRIO_PSR_I_SET in the PMR early in
> the exception entry sequence, so early in exception entry the PMR can
> indicate that interrupts are unmasked even though they are masked by
> DAIF.
> 
> If DEBUG_LOCKDEP is selected, lockdep_hardirqs_off() will check that
> interrupts are masked, before we set GIC_PRIO_PSR_I_SET in any of the
> exception entry paths, and hence lockdep_hardirqs_off() will WARN() that
> something is amiss.
> 
> We can avoid this by consistently setting GIC_PRIO_PSR_I_SET during
> exception entry so that kernel code sees a consistent environment. We
> must also update local_daif_inherit() to undo this, as currently only
> touches DAIF. For other paths, local_daif_restore() will update both
> DAIF and the PMR. With this done, we can remove the existing special
> cases which set this later in the entry code.
> 
> We always use (GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET) for consistency with
> local_daif_save(), as this will warn if it ever encounters
> (GIC_PRIO_IRQOFF | GIC_PRIO_PSR_I_SET), and never sets this itself. This
> matches the gic_prio_kentry_setup that we have to retain for
> ret_to_user.
> 
> The original splat from Zenghui's report was:
> 
> | DEBUG_LOCKS_WARN_ON(!irqs_disabled())
> | WARNING: CPU: 3 PID: 125 at kernel/locking/lockdep.c:4258 lockdep_hardirqs_off+0xd4/0xe8
> | Modules linked in:
> | CPU: 3 PID: 125 Comm: modprobe Tainted: G        W         5.12.0-rc8+ #463
> | Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
> | pstate: 604003c5 (nZCv DAIF +PAN -UAO -TCO BTYPE=--)
> | pc : lockdep_hardirqs_off+0xd4/0xe8
> | lr : lockdep_hardirqs_off+0xd4/0xe8
> | sp : ffff80002a39bad0
> | pmr_save: 000000e0
> | x29: ffff80002a39bad0 x28: ffff0000de214bc0
> | x27: ffff0000de1c0400 x26: 000000000049b328
> | x25: 0000000000406f30 x24: ffff0000de1c00a0
> | x23: 0000000020400005 x22: ffff8000105f747c
> | x21: 0000000096000044 x20: 0000000000498ef9
> | x19: ffff80002a39bc88 x18: ffffffffffffffff
> | x17: 0000000000000000 x16: ffff800011c61eb0
> | x15: ffff800011700a88 x14: 0720072007200720
> | x13: 0720072007200720 x12: 0720072007200720
> | x11: 0720072007200720 x10: 0720072007200720
> | x9 : ffff80002a39bad0 x8 : ffff80002a39bad0
> | x7 : ffff8000119f0800 x6 : c0000000ffff7fff
> | x5 : ffff8000119f07a8 x4 : 0000000000000001
> | x3 : 9bcdab23f2432800 x2 : ffff800011730538
> | x1 : 9bcdab23f2432800 x0 : 0000000000000000
> | Call trace:
> |  lockdep_hardirqs_off+0xd4/0xe8
> |  enter_from_kernel_mode.isra.5+0x7c/0xa8
> |  el1_abort+0x24/0x100
> |  el1_sync_handler+0x80/0xd0
> |  el1_sync+0x6c/0x100
> |  __arch_clear_user+0xc/0x90
> |  load_elf_binary+0x9fc/0x1450
> |  bprm_execve+0x404/0x880
> |  kernel_execve+0x180/0x188
> |  call_usermodehelper_exec_async+0xdc/0x158
> |  ret_from_fork+0x10/0x18
> 
> Fixes: 23529049c6842382 ("arm64: entry: fix non-NMI user<->kernel transitions")
> Fixes: 7cd1ea1010acbede ("arm64: entry: fix non-NMI kernel<->kernel transitions")
> Fixes: f0cd5ac1e4c53cb6 ("arm64: entry: fix NMI {user, kernel}->kernel transitions")
> Fixes: 2a9b3e6ac69a8bf1 ("arm64: entry: fix EL1 debug transitions")
> Link: https://lore.kernel.org/r/f4012761-026f-4e51-3a0c-7524e434e8b3@huawei.com
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Reported-by: Zenghui Yu <yuzenghui at huawei.com>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Will Deacon <will at kernel.org>
> ---
>  arch/arm64/include/asm/daifflags.h |  3 +++
>  arch/arm64/kernel/entry-common.c   | 17 -----------------
>  arch/arm64/kernel/entry.S          | 15 ++-------------
>  3 files changed, 5 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
> index 1c26d7baa67f..cfdde3a56805 100644
> --- a/arch/arm64/include/asm/daifflags.h
> +++ b/arch/arm64/include/asm/daifflags.h
> @@ -131,6 +131,9 @@ static inline void local_daif_inherit(struct pt_regs *regs)
>  	if (interrupts_enabled(regs))
>  		trace_hardirqs_on();
>  
> +	if (system_uses_irq_prio_masking())
> +		gic_write_pmr(regs->pmr_save);
> +
>  	/*
>  	 * We can't use local_daif_restore(regs->pstate) here as
>  	 * system_has_prio_mask_debugging() won't restore the I bit if it can
> diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
> index 9d3588450473..117412bae915 100644
> --- a/arch/arm64/kernel/entry-common.c
> +++ b/arch/arm64/kernel/entry-common.c
> @@ -226,14 +226,6 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
>  {
>  	unsigned long far = read_sysreg(far_el1);
>  
> -	/*
> -	 * The CPU masked interrupts, and we are leaving them masked during
> -	 * do_debug_exception(). Update PMR as if we had called
> -	 * local_daif_mask().
> -	 */
> -	if (system_uses_irq_prio_masking())
> -		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
> -
>  	arm64_enter_el1_dbg(regs);
>  	if (!cortex_a76_erratum_1463225_debug_handler(regs))
>  		do_debug_exception(far, esr, regs);
> @@ -398,9 +390,6 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
>  	/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
>  	unsigned long far = read_sysreg(far_el1);
>  
> -	if (system_uses_irq_prio_masking())
> -		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
> -
>  	enter_from_user_mode();
>  	do_debug_exception(far, esr, regs);
>  	local_daif_restore(DAIF_PROCCTX_NOIRQ);
> @@ -408,9 +397,6 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
>  
>  static void noinstr el0_svc(struct pt_regs *regs)
>  {
> -	if (system_uses_irq_prio_masking())
> -		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
> -
>  	enter_from_user_mode();
>  	cortex_a76_erratum_1463225_svc_handler();
>  	do_el0_svc(regs);
> @@ -486,9 +472,6 @@ static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
>  
>  static void noinstr el0_svc_compat(struct pt_regs *regs)
>  {
> -	if (system_uses_irq_prio_masking())
> -		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
> -
>  	enter_from_user_mode();
>  	cortex_a76_erratum_1463225_svc_handler();
>  	do_el0_svc_compat(regs);
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 6acfc5e6b5e0..941bb52b5e21 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -292,6 +292,8 @@ alternative_else_nop_endif
>  alternative_if ARM64_HAS_IRQ_PRIO_MASKING
>  	mrs_s	x20, SYS_ICC_PMR_EL1
>  	str	x20, [sp, #S_PMR_SAVE]
> +	mov	x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
> +	msr_s	SYS_ICC_PMR_EL1, x20

I'm a bit worried about forcing PMR to IRQON at this stage. We could
have been in IRQOFF state and entering the kernel because of a
NMI. Don't we risk losing track of how we made it here? In the current
code, the ORR makes it plain that only the I bit has changed at this
stage.

Is there some other state tracking that make this complexity
irrelevant?

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.



More information about the linux-arm-kernel mailing list