[PATCH RFC] RISC-V: Fix a register to store the percpu offset
Clément Léger
cleger at rivosinc.com
Mon Jul 7 00:55:19 PDT 2025
On 04/07/2025 10:45, Yunhui Cui wrote:
> The following data was collected from tests conducted on the
> Spacemit(R) X60 using the fixed register method:
>
> No modifications:
> 6.77, 6.791, 6.792, 6.826, 6.784, 6.839, 6.776, 6.733, 6.795, 6.763
> Average: 6.786839305
>
> ffix-x27:
> 7.106, 7.035, 7.362, 7.141, 7.096, 7.182, 7.109, 7.126, 7.186, 7.12
> Average: 7.145826539
>
> ffix-x27 + x27(s11) used for offset optimization:
> 7.059, 6.951, 6.961, 6.985, 6.93, 6.964, 6.977, 6.907, 6.983, 6.944
> Average: 6.965993173
Hi Yunhui,
Out of curiosity, did you tried using a register different than x27 ?
Thanks,
Clément
>
> Analysis:
> The fixed register method reduced performance by 5.29%.
> The per-CPU offset optimization improved performance by 2.52%.
>
> Issues with the fixed register method (beyond code size):
> Performance degradation due to the loss of one general-purpose register.
> Each handle_exception() call requires loading the per-CPU offset into the
> fixed register.
>
> Signed-off-by: Yunhui Cui <cuiyunhui at bytedance.com>
> ---
> Makefile | 4 ++--
> arch/riscv/include/asm/percpu.h | 22 ++++++++++++++++++++++
> arch/riscv/kernel/asm-offsets.c | 1 +
> arch/riscv/kernel/entry.S | 11 +++++++++--
> arch/riscv/kernel/smpboot.c | 3 +++
> 5 files changed, 37 insertions(+), 4 deletions(-)
> create mode 100644 arch/riscv/include/asm/percpu.h
>
> diff --git a/Makefile b/Makefile
> index b7d5f2f0def0..e291f865adc4 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1026,8 +1026,8 @@ include $(addprefix $(srctree)/, $(include-y))
>
> # Add user supplied CPPFLAGS, AFLAGS, CFLAGS and RUSTFLAGS as the last assignments
> KBUILD_CPPFLAGS += $(KCPPFLAGS)
> -KBUILD_AFLAGS += $(KAFLAGS)
> -KBUILD_CFLAGS += $(KCFLAGS)
> +KBUILD_AFLAGS += $(KAFLAGS) -ffixed-x27
> +KBUILD_CFLAGS += $(KCFLAGS) -ffixed-x27
> KBUILD_RUSTFLAGS += $(KRUSTFLAGS)
>
> KBUILD_LDFLAGS_MODULE += --build-id=sha1
> diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
> new file mode 100644
> index 000000000000..5d6b109cfab7
> --- /dev/null
> +++ b/arch/riscv/include/asm/percpu.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#ifndef __ASM_PERCPU_H
> +#define __ASM_PERCPU_H
> +
> +static inline void set_my_cpu_offset(unsigned long off)
> +{
> + asm volatile("addi s11, %0, 0" :: "r" (off));
> +}
> +
> +static inline unsigned long __kern_my_cpu_offset(void)
> +{
> + unsigned long off;
> + asm ("mv %0, s11" :"=r" (off) :);
> + return off;
> +}
> +
> +#define __my_cpu_offset __kern_my_cpu_offset()
> +
> +#include <asm-generic/percpu.h>
> +
> +#endif
> diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
> index a03129f40c46..0ce96f30bf32 100644
> --- a/arch/riscv/kernel/asm-offsets.c
> +++ b/arch/riscv/kernel/asm-offsets.c
> @@ -35,6 +35,7 @@ void asm_offsets(void)
> OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]);
> OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]);
> OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]);
> + OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
> OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags);
> OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
> OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index 9d1a305d5508..529d6576265e 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -77,6 +77,13 @@ SYM_CODE_START(handle_exception)
> */
> csrw CSR_SCRATCH, x0
>
> + /* load __per_cpu_offset[cpu] to s11*/
> + REG_L t6, TASK_TI_CPU(tp)
> + slli t6, t6, 3
> + la s11, __per_cpu_offset
> + add s11, s11, t6
> + REG_L s11, 0(s11)
> +
> /* Load the global pointer */
> load_global_pointer
>
> @@ -298,7 +305,7 @@ SYM_FUNC_START(__switch_to)
> REG_S s8, TASK_THREAD_S8_RA(a3)
> REG_S s9, TASK_THREAD_S9_RA(a3)
> REG_S s10, TASK_THREAD_S10_RA(a3)
> - REG_S s11, TASK_THREAD_S11_RA(a3)
> + /* REG_S s11, TASK_THREAD_S11_RA(a3) */
> /* Save the kernel shadow call stack pointer */
> scs_save_current
> /* Restore context from next->thread */
> @@ -315,7 +322,7 @@ SYM_FUNC_START(__switch_to)
> REG_L s8, TASK_THREAD_S8_RA(a4)
> REG_L s9, TASK_THREAD_S9_RA(a4)
> REG_L s10, TASK_THREAD_S10_RA(a4)
> - REG_L s11, TASK_THREAD_S11_RA(a4)
> + /* REG_L s11, TASK_THREAD_S11_RA(a4) */
> /* The offset of thread_info in task_struct is zero. */
> move tp, a1
> /* Switch to the next shadow call stack */
> diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
> index fb6ab7f8bfbd..6fa12cc84523 100644
> --- a/arch/riscv/kernel/smpboot.c
> +++ b/arch/riscv/kernel/smpboot.c
> @@ -43,6 +43,7 @@ static DECLARE_COMPLETION(cpu_running);
>
> void __init smp_prepare_boot_cpu(void)
> {
> + set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
> }
>
> void __init smp_prepare_cpus(unsigned int max_cpus)
> @@ -240,6 +241,8 @@ asmlinkage __visible void smp_callin(void)
> mmgrab(mm);
> current->active_mm = mm;
>
> + set_my_cpu_offset(per_cpu_offset(curr_cpuid));
> +
> store_cpu_topology(curr_cpuid);
> notify_cpu_starting(curr_cpuid);
>
More information about the linux-riscv
mailing list