[PATCH v2] ARM: implement optimized percpu variable access
Nicolas Pitre
nico at fluxnic.net
Thu Nov 29 10:45:50 EST 2012
On Thu, 29 Nov 2012, Rob Herring wrote:
> From: Rob Herring <rob.herring at calxeda.com>
>
> Use the previously unused TPIDRPRW register to store percpu offsets.
> TPIDRPRW is only accessible in PL1, so it can only be used in the kernel.
>
> This replaces 2 loads with a mrc instruction for each percpu variable
> access. With hackbench, the performance improvement is 1.4% on Cortex-A9
> (highbank). Taking an average of 30 runs of "hackbench -l 1000" yields:
>
> Before: 6.2191
> After: 6.1348
>
> Will Deacon reported similar delta on v6 with 11MPCore.
>
> The asm "memory" constraints are needed here to ensure the percpu offset
> gets reloaded. Testing by Will found that this would not happen in
> __schedule() which is a bit of a special case as preemption is disabled
> but the execution can move cores.
Strictly speaking, you should say "memory clobber" and not "memory
constraint" in this case.
>
> Signed-off-by: Rob Herring <rob.herring at calxeda.com>
> Acked-by: Will Deacon <will.deacon at arm.com>
With the above, and moving the call to cpu_init() after the call to
cpu_switch_mm(mm->pgd, mm) to fix Will's concerns (personally I'd put it
right after local_flush_tlb_all())...
Acked-by: Nicolas Pitre <nico at linaro.org>
> ---
> Changes in v2:
> - Add asm "memory" constraint
> - Only enable on v6K and v7 and avoid enabling for v6 SMP_ON_UP
> - Fix missing initialization of TPIDRPRW for resume path
> - Move cpu_init to beginning of secondary_start_kernel to ensure percpu
> variables can be accessed as early as possible.
>
> arch/arm/include/asm/Kbuild | 1 -
> arch/arm/include/asm/percpu.h | 45 +++++++++++++++++++++++++++++++++++++++++
> arch/arm/kernel/setup.c | 6 ++++++
> arch/arm/kernel/smp.c | 4 +++-
> 4 files changed, 54 insertions(+), 2 deletions(-)
> create mode 100644 arch/arm/include/asm/percpu.h
>
> diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
> index f70ae17..2ffdaac 100644
> --- a/arch/arm/include/asm/Kbuild
> +++ b/arch/arm/include/asm/Kbuild
> @@ -16,7 +16,6 @@ generic-y += local64.h
> generic-y += msgbuf.h
> generic-y += param.h
> generic-y += parport.h
> -generic-y += percpu.h
> generic-y += poll.h
> generic-y += resource.h
> generic-y += sections.h
> diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
> new file mode 100644
> index 0000000..968c0a1
> --- /dev/null
> +++ b/arch/arm/include/asm/percpu.h
> @@ -0,0 +1,45 @@
> +/*
> + * Copyright 2012 Calxeda, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +#ifndef _ASM_ARM_PERCPU_H_
> +#define _ASM_ARM_PERCPU_H_
> +
> +/*
> + * Same as asm-generic/percpu.h, except that we store the per cpu offset
> + * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
> + */
> +#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
> +static inline void set_my_cpu_offset(unsigned long off)
> +{
> + /* Set TPIDRPRW */
> + asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
> +}
> +
> +static inline unsigned long __my_cpu_offset(void)
> +{
> + unsigned long off;
> + /* Read TPIDRPRW */
> + asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) : : "memory");
> + return off;
> +}
> +#define __my_cpu_offset __my_cpu_offset()
> +#else
> +#define set_my_cpu_offset(x) do {} while(0)
> +
> +#endif /* CONFIG_SMP */
> +
> +#include <asm-generic/percpu.h>
> +
> +#endif /* _ASM_ARM_PERCPU_H_ */
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index da1d1aa..b2909ce 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -383,6 +383,12 @@ void cpu_init(void)
> BUG();
> }
>
> + /*
> + * This only works on resume and secondary cores. For booting on the
> + * boot cpu, smp_prepare_boot_cpu is called after percpu area setup.
> + */
> + set_my_cpu_offset(per_cpu_offset(cpu));
> +
> cpu_proc_init();
>
> /*
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index fbc8b26..aadcca7 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -296,6 +296,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
> struct mm_struct *mm = &init_mm;
> unsigned int cpu;
>
> + cpu_init();
> +
> /*
> * The identity mapping is uncached (strongly ordered), so
> * switch away from it before attempting any exclusive accesses.
> @@ -315,7 +317,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
>
> printk("CPU%u: Booted secondary processor\n", cpu);
>
> - cpu_init();
> preempt_disable();
> trace_hardirqs_off();
>
> @@ -371,6 +372,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
>
> void __init smp_prepare_boot_cpu(void)
> {
> + set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
> }
>
> void __init smp_prepare_cpus(unsigned int max_cpus)
> --
> 1.7.10.4
>
More information about the linux-arm-kernel
mailing list