[PATCH 11/11] arm64: percpu: use local percpu for this_cpu_*() APIs

Yang Shi yang at os.amperecomputing.com
Wed Apr 29 10:04:39 PDT 2026


Use local percpu address for this_cpu_*() APIs.  Because the percpu
variable is mapped to the same virtual address, their address can be
calculated by using __per_cpu_local_off which has same value for all
CPUs.  So preempt_disable/preempt_enable is not needed anymore.  This
optimization can improve the performance for this_cpu_*() operations.

Kernel build test on AmpereOne (160 cores) with default Fedora kernel
config in a memcg roughly showed 13% - 15% sys time improvement.

Signed-off-by: Yang Shi <yang at os.amperecomputing.com>
---
 arch/arm64/include/asm/percpu.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index b57b2bb00967..15db56f981de 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -12,6 +12,7 @@
 #include <asm/stack_pointer.h>
 #include <asm/sysreg.h>
 
+extern unsigned long __per_cpu_local_off;
 static inline void set_my_cpu_offset(unsigned long off)
 {
 	asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
@@ -153,19 +154,21 @@ PERCPU_RET_OP(add, add, ldadd)
  * disabled.
  */
 
+#define local_cpu_ptr(ptr)						\
+({									\
+	__verify_pcpu_ptr(ptr);						\
+	SHIFT_PERCPU_PTR(ptr, __per_cpu_local_off);			\
+})
+
 #define _pcp_protect(op, pcp, ...)					\
 ({									\
-	preempt_disable_notrace();					\
-	op(raw_cpu_ptr(&(pcp)), __VA_ARGS__);				\
-	preempt_enable_notrace();					\
+	op(local_cpu_ptr(&(pcp)), __VA_ARGS__);				\
 })
 
 #define _pcp_protect_return(op, pcp, args...)				\
 ({									\
 	typeof(pcp) __retval;						\
-	preempt_disable_notrace();					\
-	__retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args);	\
-	preempt_enable_notrace();					\
+	__retval = (typeof(pcp))op(local_cpu_ptr(&(pcp)), ##args);	\
 	__retval;							\
 })
 
@@ -251,7 +254,7 @@ PERCPU_RET_OP(add, add, ldadd)
 	old__ = o;							\
 	new__ = n;							\
 	preempt_disable_notrace();					\
-	ptr__ = raw_cpu_ptr(&(pcp));					\
+	ptr__ = local_cpu_ptr(&(pcp));					\
 	ret__ = cmpxchg128_local((void *)ptr__, old__, new__);		\
 	preempt_enable_notrace();					\
 	ret__;								\
-- 
2.47.0




More information about the linux-arm-kernel mailing list