Re: [PATCH v2 3/4] Provide the always inline version of some functions

Sat Nov 8 14:14:44 PST 2025

On November 8, 2025 9:23:45 AM PST, Xie Yuanbin <qq570070308 at gmail.com> wrote:
>On critical hot code paths, inline functions can optimize performance.
>However, for current compilers, there is no way to request them to inline
>at a specific calling point of a function.
>
>Add a always inline version to some functions, so that they can be chosen
>when called in hot paths.
>
>Signed-off-by: Xie Yuanbin <qq570070308 at gmail.com>
>Cc: Thomas Gleixner <tglx at linutronix.de>
>Cc: Rik van Riel <riel at surriel.com>
>Cc: Segher Boessenkool <segher at kernel.crashing.org>
>Cc: David Hildenbrand <david at redhat.com>
>Cc: Peter Zijlstra <peterz at infradead.org>
>---
> arch/arm/include/asm/mmu_context.h      | 12 +++++++-
> arch/s390/include/asm/mmu_context.h     | 12 +++++++-
> arch/sparc/include/asm/mmu_context_64.h | 12 +++++++-
> kernel/sched/core.c                     | 38 ++++++++++++++++++++++---
> 4 files changed, 67 insertions(+), 7 deletions(-)
>
>diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
>index db2cb06aa8cf..e77b271570c1 100644
>--- a/arch/arm/include/asm/mmu_context.h
>+++ b/arch/arm/include/asm/mmu_context.h
>@@ -80,7 +80,12 @@ static inline void check_and_switch_context(struct mm_struct *mm,
> #ifndef MODULE
> #define finish_arch_post_lock_switch \
> 	finish_arch_post_lock_switch
>-static inline void finish_arch_post_lock_switch(void)
>+/*
>+ * finish_arch_post_lock_switch_ainline - the always inline version of
>+ * finish_arch_post_lock_switch, used for performance sensitive paths.
>+ * If unsure, use finish_arch_post_lock_switch instead.
>+ */
>+static __always_inline void finish_arch_post_lock_switch_ainline(void)
> {
> 	struct mm_struct *mm = current->mm;
> 
>@@ -99,6 +104,11 @@ static inline void finish_arch_post_lock_switch(void)
> 		preempt_enable_no_resched();
> 	}
> }
>+
>+static inline void finish_arch_post_lock_switch(void)
>+{
>+	finish_arch_post_lock_switch_ainline();
>+}
> #endif /* !MODULE */
> 
> #endif	/* CONFIG_MMU */
>diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
>index d9b8501bc93d..577062834906 100644
>--- a/arch/s390/include/asm/mmu_context.h
>+++ b/arch/s390/include/asm/mmu_context.h
>@@ -97,7 +97,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> }
> 
> #define finish_arch_post_lock_switch finish_arch_post_lock_switch
>-static inline void finish_arch_post_lock_switch(void)
>+/*
>+ * finish_arch_post_lock_switch_ainline - the always inline version of
>+ * finish_arch_post_lock_switch, used for performance sensitive paths.
>+ * If unsure, use finish_arch_post_lock_switch instead.
>+ */
>+static __always_inline void finish_arch_post_lock_switch_ainline(void)
> {
> 	struct task_struct *tsk = current;
> 	struct mm_struct *mm = tsk->mm;
>@@ -120,6 +125,11 @@ static inline void finish_arch_post_lock_switch(void)
> 	local_irq_restore(flags);
> }
> 
>+static inline void finish_arch_post_lock_switch(void)
>+{
>+	finish_arch_post_lock_switch_ainline();
>+}
>+
> #define activate_mm activate_mm
> static inline void activate_mm(struct mm_struct *prev,
>                                struct mm_struct *next)
>diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
>index 78bbacc14d2d..ca7019080574 100644
>--- a/arch/sparc/include/asm/mmu_context_64.h
>+++ b/arch/sparc/include/asm/mmu_context_64.h
>@@ -160,7 +160,12 @@ static inline void arch_start_context_switch(struct task_struct *prev)
> }
> 
> #define finish_arch_post_lock_switch	finish_arch_post_lock_switch
>-static inline void finish_arch_post_lock_switch(void)
>+/*
>+ * finish_arch_post_lock_switch_ainline - the always inline version of
>+ * finish_arch_post_lock_switch, used for performance sensitive paths.
>+ * If unsure, use finish_arch_post_lock_switch instead.
>+ */
>+static __always_inline void finish_arch_post_lock_switch_ainline(void)
> {
> 	/* Restore the state of MCDPER register for the new process
> 	 * just switched to.
>@@ -185,6 +190,11 @@ static inline void finish_arch_post_lock_switch(void)
> 	}
> }
> 
>+static inline void finish_arch_post_lock_switch(void)
>+{
>+	finish_arch_post_lock_switch_ainline();
>+}
>+
> #define mm_untag_mask mm_untag_mask
> static inline unsigned long mm_untag_mask(struct mm_struct *mm)
> {
>diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>index 0e50ef3d819a..c50e672e22c4 100644
>--- a/kernel/sched/core.c
>+++ b/kernel/sched/core.c
>@@ -4884,7 +4884,13 @@ static inline void finish_task(struct task_struct *prev)
> 	smp_store_release(&prev->on_cpu, 0);
> }
> 
>-static void do_balance_callbacks(struct rq *rq, struct balance_callback *head)
>+/*
>+ * do_balance_callbacks_ainline - the always inline version of
>+ * do_balance_callbacks, used for performance sensitive paths.
>+ * If unsure, use do_balance_callbacks instead.
>+ */
>+static __always_inline void do_balance_callbacks_ainline(struct rq *rq,
>+		struct balance_callback *head)
> {
> 	void (*func)(struct rq *rq);
> 	struct balance_callback *next;
>@@ -4901,6 +4907,11 @@ static void do_balance_callbacks(struct rq *rq, struct balance_callback *head)
> 	}
> }
> 
>+static void do_balance_callbacks(struct rq *rq, struct balance_callback *head)
>+{
>+	do_balance_callbacks_ainline(rq, head);
>+}
>+
> static void balance_push(struct rq *rq);
> 
> /*
>@@ -4949,11 +4960,21 @@ struct balance_callback *splice_balance_callbacks(struct rq *rq)
> 	return __splice_balance_callbacks(rq, true);
> }
> 
>-static void __balance_callbacks(struct rq *rq)
>+/*
>+ * __balance_callbacks_ainline - the always inline version of
>+ * __balance_callbacks, used for performance sensitive paths.
>+ * If unsure, use __balance_callbacks instead.
>+ */
>+static __always_inline void __balance_callbacks_ainline(struct rq *rq)
> {
> 	do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
> }
> 
>+static void __balance_callbacks(struct rq *rq)
>+{
>+	__balance_callbacks_ainline(rq);
>+}
>+
> void balance_callbacks(struct rq *rq, struct balance_callback *head)
> {
> 	unsigned long flags;
>@@ -5003,7 +5024,8 @@ static inline void finish_lock_switch(struct rq *rq)
> #endif
> 
> #ifndef finish_arch_post_lock_switch
>-# define finish_arch_post_lock_switch()	do { } while (0)
>+# define finish_arch_post_lock_switch()		do { } while (0)
>+# define finish_arch_post_lock_switch_ainline()	do { } while (0)
> #endif
> 
> static inline void kmap_local_sched_out(void)
>@@ -5050,6 +5072,9 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
> 
> /**
>  * finish_task_switch - clean up after a task-switch
>+ * finish_task_switch_ainline - the always inline version of this func
>+ * used for performance sensitive paths
>+ *
>  * @prev: the thread we just switched away from.
>  *
>  * finish_task_switch must be called after the context switch, paired
>@@ -5067,7 +5092,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
>  * past. 'prev == current' is still correct but we need to recalculate this_rq
>  * because prev may have moved to another CPU.
>  */
>-static struct rq *finish_task_switch(struct task_struct *prev)
>+static __always_inline struct rq *finish_task_switch_ainline(struct task_struct *prev)
> 	__releases(rq->lock)
> {
> 	struct rq *rq = this_rq();
>@@ -5159,6 +5184,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
> 	return rq;
> }
> 
>+static struct rq *finish_task_switch(struct task_struct *prev)
>+{
>+	return finish_task_switch_ainline(prev);
>+}
>+
> /**
>  * schedule_tail - first thing a freshly forked thread must call.
>  * @prev: the thread we just switched away from.

There is, in fact: you have to have an always_inline version, and wrap it in a noinline version.