[PATCH 1/2] ARM: vexpress/TC2: basic PM support
Lorenzo Pieralisi
lorenzo.pieralisi at arm.com
Fri Jun 7 10:26:45 EDT 2013
On Fri, Jun 07, 2013 at 07:39:11AM +0100, Nicolas Pitre wrote:
> This is the MCPM backend for the Virtual Express A15x2 A7x3 CoreTile
> aka TC2. This provides cluster management for SMP secondary boot and
> CPU hotplug.
>
> Signed-off-by: Nicolas Pitre <nico at linaro.org>
> ---
> arch/arm/mach-vexpress/Kconfig | 9 ++
> arch/arm/mach-vexpress/Makefile | 1 +
> arch/arm/mach-vexpress/tc2_pm.c | 243 ++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 253 insertions(+)
> create mode 100644 arch/arm/mach-vexpress/tc2_pm.c
>
> diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
> index b8bbabec63..e7a825d7df 100644
> --- a/arch/arm/mach-vexpress/Kconfig
> +++ b/arch/arm/mach-vexpress/Kconfig
> @@ -66,4 +66,13 @@ config ARCH_VEXPRESS_DCSCB
> This is needed to provide CPU and cluster power management
> on RTSM implementing big.LITTLE.
>
> +config ARCH_VEXPRESS_TC2
> + bool "Versatile Express TC2 power management"
> + depends on MCPM
> + select VEXPRESS_SPC
> + select ARM_CCI
> + help
> + Support for CPU and cluster power management on Versatile Express
> + with a TC2 (A15x2 A7x3) big.LITTLE core tile.
> +
> endmenu
> diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
> index 48ba89a814..b1cf227fa5 100644
> --- a/arch/arm/mach-vexpress/Makefile
> +++ b/arch/arm/mach-vexpress/Makefile
> @@ -7,5 +7,6 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
> obj-y := v2m.o
> obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o
> obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o
> +obj-$(CONFIG_ARCH_VEXPRESS_TC2) += tc2_pm.o
> obj-$(CONFIG_SMP) += platsmp.o
> obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o
> diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
> new file mode 100644
> index 0000000000..a3ea524372
> --- /dev/null
> +++ b/arch/arm/mach-vexpress/tc2_pm.c
> @@ -0,0 +1,243 @@
> +/*
> + * arch/arm/mach-vexpress/tc2_pm.c - TC2 power management support
> + *
> + * Created by: Nicolas Pitre, October 2012
> + * Copyright: (C) 2012-2013 Linaro Limited
> + *
> + * Some portions of this file were originally written by Achin Gupta
> + * Copyright: (C) 2012 ARM Limited
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/spinlock.h>
> +#include <linux/errno.h>
> +
> +#include <asm/mcpm.h>
> +#include <asm/proc-fns.h>
> +#include <asm/cacheflush.h>
> +#include <asm/cputype.h>
> +#include <asm/cp15.h>
> +
> +#include <mach/motherboard.h>
Is the include above needed ?
> +#include <linux/vexpress.h>
> +#include <linux/arm-cci.h>
> +
> +/*
> + * We can't use regular spinlocks. In the switcher case, it is possible
> + * for an outbound CPU to call power_down() after its inbound counterpart
> + * is already live using the same logical CPU number which trips lockdep
> + * debugging.
> + */
> +static arch_spinlock_t tc2_pm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> +
> +static int tc2_pm_use_count[3][2];
> +
> +static int tc2_pm_power_up(unsigned int cpu, unsigned int cluster)
> +{
> + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> + if (cluster >= 2 || cpu >= vexpress_spc_get_nb_cpus(cluster))
> + return -EINVAL;
We could stash (vexpress_spc_get_nb_cpus()), it never changes.
> + /*
> + * Since this is called with IRQs enabled, and no arch_spin_lock_irq
> + * variant exists, we need to disable IRQs manually here.
> + */
> + local_irq_disable();
> + arch_spin_lock(&tc2_pm_lock);
> +
> + if (!tc2_pm_use_count[0][cluster] &&
> + !tc2_pm_use_count[1][cluster] &&
> + !tc2_pm_use_count[2][cluster])
> + vexpress_spc_powerdown_enable(cluster, 0);
> +
> + tc2_pm_use_count[cpu][cluster]++;
> + if (tc2_pm_use_count[cpu][cluster] == 1) {
> + vexpress_spc_write_resume_reg(cluster, cpu,
> + virt_to_phys(mcpm_entry_point));
> + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
> + } else if (tc2_pm_use_count[cpu][cluster] != 2) {
> + /*
> + * The only possible values are:
> + * 0 = CPU down
> + * 1 = CPU (still) up
> + * 2 = CPU requested to be up before it had a chance
> + * to actually make itself down.
> + * Any other value is a bug.
> + */
> + BUG();
> + }
> +
> + arch_spin_unlock(&tc2_pm_lock);
> + local_irq_enable();
> +
> + return 0;
> +}
> +
> +static void tc2_pm_power_down(void)
> +{
> + unsigned int mpidr, cpu, cluster;
> + bool last_man = false, skip_wfi = false;
> +
> + mpidr = read_cpuid_mpidr();
> + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +
> + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> + BUG_ON(cluster >= 2 || cpu >= vexpress_spc_get_nb_cpus(cluster));
Ditto, see above.
> + __mcpm_cpu_going_down(cpu, cluster);
> +
> + arch_spin_lock(&tc2_pm_lock);
> + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
> + tc2_pm_use_count[cpu][cluster]--;
> + if (tc2_pm_use_count[cpu][cluster] == 0) {
> + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
> + if (!tc2_pm_use_count[0][cluster] &&
> + !tc2_pm_use_count[1][cluster] &&
> + !tc2_pm_use_count[2][cluster]) {
> + vexpress_spc_powerdown_enable(cluster, 1);
> + vexpress_spc_set_global_wakeup_intr(1);
> + last_man = true;
> + }
> + } else if (tc2_pm_use_count[cpu][cluster] == 1) {
> + /*
> + * A power_up request went ahead of us.
> + * Even if we do not want to shut this CPU down,
> + * the caller expects a certain state as if the WFI
> + * was aborted. So let's continue with cache cleaning.
> + */
> + skip_wfi = true;
> + } else
> + BUG();
> +
> + if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
> + arch_spin_unlock(&tc2_pm_lock);
> +
> + set_cr(get_cr() & ~CR_C);
We must disable L2 prefetching on A15 before cleaning L2.
> + flush_cache_all();
> + asm volatile ("clrex");
> + set_auxcr(get_auxcr() & ~(1 << 6));
I think we should add comments here to avoid copy'n'paste mayhem. The
code above is safe on cpus like A15/A7 (I know this back-end can just
be run on those processors) that hit in the cache with C-bit in SCTLR
cleared, it would explode on processors (eg A9) that do not hit in the
cache with C-bit cleared. I am wondering if it is better to write inline
asm and jump to v7 cache functions that do not need stack push/pop
straight away.
> + cci_disable_port_by_cpu(mpidr);
> +
> + /*
> + * Ensure that both C & I bits are disabled in the SCTLR
> + * before disabling ACE snoops. This ensures that no
> + * coherency traffic will originate from this cpu after
> + * ACE snoops are turned off.
> + */
> + cpu_proc_fin();
Mmm, C bit is already cleared, why clear the I bit (and the A bit) ?
I do not think cpu_proc_fin() is needed and I am really keen on getting
the power down procedure right to avoid copy'n'paste induced error from
the start.
> +
> + __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
> + } else {
> + /*
> + * If last man then undo any setup done previously.
> + */
> + if (last_man) {
> + vexpress_spc_powerdown_enable(cluster, 0);
> + vexpress_spc_set_global_wakeup_intr(0);
> + }
> +
> + arch_spin_unlock(&tc2_pm_lock);
> +
> + set_cr(get_cr() & ~CR_C);
> + flush_cache_louis();
> + asm volatile ("clrex");
> + set_auxcr(get_auxcr() & ~(1 << 6));
> + }
> +
> + __mcpm_cpu_down(cpu, cluster);
> +
> + /* Now we are prepared for power-down, do it: */
> + if (!skip_wfi)
> + wfi();
> +
> + /* Not dead at this point? Let our caller cope. */
This function should disable the GIC CPU IF, but I guess you will add
the code when CPUidle is merged.
> +static void tc2_pm_powered_up(void)
> +{
> + unsigned int mpidr, cpu, cluster;
> + unsigned long flags;
> +
> + mpidr = read_cpuid_mpidr();
> + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +
> + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> + BUG_ON(cluster >= 2 || cpu >= vexpress_spc_get_nb_cpus(cluster));
> +
> + local_irq_save(flags);
> + arch_spin_lock(&tc2_pm_lock);
> +
> + if (!tc2_pm_use_count[0][cluster] &&
> + !tc2_pm_use_count[1][cluster] &&
> + !tc2_pm_use_count[2][cluster]) {
> + vexpress_spc_powerdown_enable(cluster, 0);
> + vexpress_spc_set_global_wakeup_intr(0);
> + }
> +
> + if (!tc2_pm_use_count[cpu][cluster])
> + tc2_pm_use_count[cpu][cluster] = 1;
> +
> + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 0);
> + vexpress_spc_write_resume_reg(cluster, cpu, 0);
> +
> + arch_spin_unlock(&tc2_pm_lock);
> + local_irq_restore(flags);
> +}
> +
> +static const struct mcpm_platform_ops tc2_pm_power_ops = {
> + .power_up = tc2_pm_power_up,
> + .power_down = tc2_pm_power_down,
> + .powered_up = tc2_pm_powered_up,
> +};
> +
> +static void __init tc2_pm_usage_count_init(void)
> +{
> + unsigned int mpidr, cpu, cluster;
> +
> + mpidr = read_cpuid_mpidr();
> + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +
> + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> + BUG_ON(cpu >= 3 || cluster >= 2);
> + tc2_pm_use_count[cpu][cluster] = 1;
> +}
> +
> +/*
> + * Enable cluster-level coherency, in preparation for turning on the MMU.
> + */
> +static void __naked tc2_pm_power_up_setup(unsigned int affinity_level)
> +{
> + asm volatile (" \n"
> +" cmp r0, #1 \n"
> +" beq cci_enable_port_for_self \n"
> +" bx lr ");
> +}
We could write a function like the above (stackless and inline) for the
sequence:
1- clear C bit
2- flush cache all/louis
3- exit coherency
Again, the current implementation is right on TC2 but we should not let
people think that's correct for all v7 processors
Lorenzo
More information about the linux-arm-kernel
mailing list