[PATCH 04/10] entry: Split kernel mode logic from irqentry_{enter,exit}()

Jinjie Ruan ruanjinjie at huawei.com
Tue Apr 7 18:32:22 PDT 2026



On 2026/4/7 21:16, Mark Rutland wrote:
> The generic irqentry code has entry/exit functions specifically for
> exceptions taken from user mode, but doesn't have entry/exit functions
> specifically for exceptions taken from kernel mode.
> 
> It would be helpful to have separate entry/exit functions specifically
> for exceptions taken from kernel mode. This would make the structure of
> the entry code more consistent, and would make it easier for
> architectures to manage logic specific to exceptions taken from kernel
> mode.
> 
> Move the logic specific to kernel mode out of irqentry_enter() and
> irqentry_exit() into new irqentry_enter_from_kernel_mode() and
> irqentry_exit_to_kernel_mode() functions. These are marked
> __always_inline and placed in irq-entry-common.h, as with
> irqentry_enter_from_user_mode() and irqentry_exit_to_user_mode(), so
> that they can be inlined into architecture-specific wrappers. The
> existing out-of-line irqentry_enter() and irqentry_exit() functions
> retained as callers of the new functions.
> 
> The lockdep assertion from irqentry_exit() is moved into
> irqentry_exit_to_user_mode() and irqentry_exit_to_kernel_mode(). This
> was previously missing from irqentry_exit_to_user_mode() when called
> directly, and any new lockdep assertion failure relating from this
> change is a latent bug.
> 
> Aside from the lockdep change noted above, there should be no functional
> change as a result of this patch.

Reviewed-by: Jinjie Ruan <ruanjinjie at huawei.com>

> 
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Cc: Andy Lutomirski <luto at kernel.org>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Jinjie Ruan <ruanjinjie at huawei.com>
> Cc: Peter Zijlstra <peterz at infradead.org>
> Cc: Thomas Gleixner <tglx at kernel.org>
> Cc: Vladimir Murzin <vladimir.murzin at arm.com>
> Cc: Will Deacon <will at kernel.org>
> ---
>  include/linux/irq-entry-common.h | 103 +++++++++++++++++++++++++++++++
>  kernel/entry/common.c            | 103 +++----------------------------
>  2 files changed, 111 insertions(+), 95 deletions(-)
> 
> Thomas/Peter/Andy, as mentioned on IRC, I haven't created kerneldoc
> comments for these new functions because the existing comments don't
> seem all that consistent (e.g. for user mode vs kernel mode), and I
> suspect we want to rewrite them all in one go for wider consistency.
> 
> I'm happy to respin this, or to follow-up with that as per your
> preference.
> 
> Mark.
> 
> diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
> index d1e8591a59195..2206150e526d8 100644
> --- a/include/linux/irq-entry-common.h
> +++ b/include/linux/irq-entry-common.h
> @@ -304,6 +304,8 @@ static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
>   */
>  static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs)
>  {
> +	lockdep_assert_irqs_disabled();
> +
>  	instrumentation_begin();
>  	irqentry_exit_to_user_mode_prepare(regs);
>  	instrumentation_end();
> @@ -356,6 +358,107 @@ void dynamic_irqentry_exit_cond_resched(void);
>  #define irqentry_exit_cond_resched()	raw_irqentry_exit_cond_resched()
>  #endif /* CONFIG_PREEMPT_DYNAMIC */
>  
> +static __always_inline irqentry_state_t irqentry_enter_from_kernel_mode(struct pt_regs *regs)
> +{
> +	irqentry_state_t ret = {
> +		.exit_rcu = false,
> +	};
> +
> +	/*
> +	 * If this entry hit the idle task invoke ct_irq_enter() whether
> +	 * RCU is watching or not.
> +	 *
> +	 * Interrupts can nest when the first interrupt invokes softirq
> +	 * processing on return which enables interrupts.
> +	 *
> +	 * Scheduler ticks in the idle task can mark quiescent state and
> +	 * terminate a grace period, if and only if the timer interrupt is
> +	 * not nested into another interrupt.
> +	 *
> +	 * Checking for rcu_is_watching() here would prevent the nesting
> +	 * interrupt to invoke ct_irq_enter(). If that nested interrupt is
> +	 * the tick then rcu_flavor_sched_clock_irq() would wrongfully
> +	 * assume that it is the first interrupt and eventually claim
> +	 * quiescent state and end grace periods prematurely.
> +	 *
> +	 * Unconditionally invoke ct_irq_enter() so RCU state stays
> +	 * consistent.
> +	 *
> +	 * TINY_RCU does not support EQS, so let the compiler eliminate
> +	 * this part when enabled.
> +	 */
> +	if (!IS_ENABLED(CONFIG_TINY_RCU) &&
> +	    (is_idle_task(current) || arch_in_rcu_eqs())) {
> +		/*
> +		 * If RCU is not watching then the same careful
> +		 * sequence vs. lockdep and tracing is required
> +		 * as in irqentry_enter_from_user_mode().
> +		 */
> +		lockdep_hardirqs_off(CALLER_ADDR0);
> +		ct_irq_enter();
> +		instrumentation_begin();
> +		kmsan_unpoison_entry_regs(regs);
> +		trace_hardirqs_off_finish();
> +		instrumentation_end();
> +
> +		ret.exit_rcu = true;
> +		return ret;
> +	}
> +
> +	/*
> +	 * If RCU is watching then RCU only wants to check whether it needs
> +	 * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
> +	 * already contains a warning when RCU is not watching, so no point
> +	 * in having another one here.
> +	 */
> +	lockdep_hardirqs_off(CALLER_ADDR0);
> +	instrumentation_begin();
> +	kmsan_unpoison_entry_regs(regs);
> +	rcu_irq_enter_check_tick();
> +	trace_hardirqs_off_finish();
> +	instrumentation_end();
> +
> +	return ret;
> +}
> +
> +static __always_inline void irqentry_exit_to_kernel_mode(struct pt_regs *regs, irqentry_state_t state)
> +{
> +	lockdep_assert_irqs_disabled();
> +
> +	if (!regs_irqs_disabled(regs)) {
> +		/*
> +		 * If RCU was not watching on entry this needs to be done
> +		 * carefully and needs the same ordering of lockdep/tracing
> +		 * and RCU as the return to user mode path.
> +		 */
> +		if (state.exit_rcu) {
> +			instrumentation_begin();
> +			/* Tell the tracer that IRET will enable interrupts */
> +			trace_hardirqs_on_prepare();
> +			lockdep_hardirqs_on_prepare();
> +			instrumentation_end();
> +			ct_irq_exit();
> +			lockdep_hardirqs_on(CALLER_ADDR0);
> +			return;
> +		}
> +
> +		instrumentation_begin();
> +		if (IS_ENABLED(CONFIG_PREEMPTION))
> +			irqentry_exit_cond_resched();
> +
> +		/* Covers both tracing and lockdep */
> +		trace_hardirqs_on();
> +		instrumentation_end();
> +	} else {
> +		/*
> +		 * IRQ flags state is correct already. Just tell RCU if it
> +		 * was not watching on entry.
> +		 */
> +		if (state.exit_rcu)
> +			ct_irq_exit();
> +	}
> +}
> +
>  /**
>   * irqentry_enter - Handle state tracking on ordinary interrupt entries
>   * @regs:	Pointer to pt_regs of interrupted context
> diff --git a/kernel/entry/common.c b/kernel/entry/common.c
> index b5e05d87ba391..1034be02eae84 100644
> --- a/kernel/entry/common.c
> +++ b/kernel/entry/common.c
> @@ -105,70 +105,16 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
>  
>  noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
>  {
> -	irqentry_state_t ret = {
> -		.exit_rcu = false,
> -	};
> -
>  	if (user_mode(regs)) {
> -		irqentry_enter_from_user_mode(regs);
> -		return ret;
> -	}
> +		irqentry_state_t ret = {
> +			.exit_rcu = false,
> +		};
>  
> -	/*
> -	 * If this entry hit the idle task invoke ct_irq_enter() whether
> -	 * RCU is watching or not.
> -	 *
> -	 * Interrupts can nest when the first interrupt invokes softirq
> -	 * processing on return which enables interrupts.
> -	 *
> -	 * Scheduler ticks in the idle task can mark quiescent state and
> -	 * terminate a grace period, if and only if the timer interrupt is
> -	 * not nested into another interrupt.
> -	 *
> -	 * Checking for rcu_is_watching() here would prevent the nesting
> -	 * interrupt to invoke ct_irq_enter(). If that nested interrupt is
> -	 * the tick then rcu_flavor_sched_clock_irq() would wrongfully
> -	 * assume that it is the first interrupt and eventually claim
> -	 * quiescent state and end grace periods prematurely.
> -	 *
> -	 * Unconditionally invoke ct_irq_enter() so RCU state stays
> -	 * consistent.
> -	 *
> -	 * TINY_RCU does not support EQS, so let the compiler eliminate
> -	 * this part when enabled.
> -	 */
> -	if (!IS_ENABLED(CONFIG_TINY_RCU) &&
> -	    (is_idle_task(current) || arch_in_rcu_eqs())) {
> -		/*
> -		 * If RCU is not watching then the same careful
> -		 * sequence vs. lockdep and tracing is required
> -		 * as in irqentry_enter_from_user_mode().
> -		 */
> -		lockdep_hardirqs_off(CALLER_ADDR0);
> -		ct_irq_enter();
> -		instrumentation_begin();
> -		kmsan_unpoison_entry_regs(regs);
> -		trace_hardirqs_off_finish();
> -		instrumentation_end();
> -
> -		ret.exit_rcu = true;
> +		irqentry_enter_from_user_mode(regs);
>  		return ret;
>  	}
>  
> -	/*
> -	 * If RCU is watching then RCU only wants to check whether it needs
> -	 * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
> -	 * already contains a warning when RCU is not watching, so no point
> -	 * in having another one here.
> -	 */
> -	lockdep_hardirqs_off(CALLER_ADDR0);
> -	instrumentation_begin();
> -	kmsan_unpoison_entry_regs(regs);
> -	rcu_irq_enter_check_tick();
> -	trace_hardirqs_off_finish();
> -	instrumentation_end();
> -
> -	return ret;
> +	return irqentry_enter_from_kernel_mode(regs);
>  }
>  
>  /**
> @@ -212,43 +158,10 @@ void dynamic_irqentry_exit_cond_resched(void)
>  
>  noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
>  {
> -	lockdep_assert_irqs_disabled();
> -
> -	/* Check whether this returns to user mode */
> -	if (user_mode(regs)) {
> +	if (user_mode(regs))
>  		irqentry_exit_to_user_mode(regs);
> -	} else if (!regs_irqs_disabled(regs)) {
> -		/*
> -		 * If RCU was not watching on entry this needs to be done
> -		 * carefully and needs the same ordering of lockdep/tracing
> -		 * and RCU as the return to user mode path.
> -		 */
> -		if (state.exit_rcu) {
> -			instrumentation_begin();
> -			/* Tell the tracer that IRET will enable interrupts */
> -			trace_hardirqs_on_prepare();
> -			lockdep_hardirqs_on_prepare();
> -			instrumentation_end();
> -			ct_irq_exit();
> -			lockdep_hardirqs_on(CALLER_ADDR0);
> -			return;
> -		}
> -
> -		instrumentation_begin();
> -		if (IS_ENABLED(CONFIG_PREEMPTION))
> -			irqentry_exit_cond_resched();
> -
> -		/* Covers both tracing and lockdep */
> -		trace_hardirqs_on();
> -		instrumentation_end();
> -	} else {
> -		/*
> -		 * IRQ flags state is correct already. Just tell RCU if it
> -		 * was not watching on entry.
> -		 */
> -		if (state.exit_rcu)
> -			ct_irq_exit();
> -	}
> +	else
> +		irqentry_exit_to_kernel_mode(regs, state);
>  }
>  
>  irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)



More information about the linux-arm-kernel mailing list