[PATCH V5 1/2] topology: Allow multiple entities to provide sched_freq_tick() callback
Ionela Voinescu
ionela.voinescu at arm.com
Tue Mar 9 15:11:57 GMT 2021
On Monday 01 Mar 2021 at 12:21:17 (+0530), Viresh Kumar wrote:
> This patch attempts to make it generic enough so other parts of the
> kernel can also provide their own implementation of scale_freq_tick()
> callback, which is called by the scheduler periodically to update the
> per-cpu freq_scale variable.
>
> The implementations now need to provide 'struct scale_freq_data' for the
> CPUs for which they have hardware counters available, and a callback
> gets registered for each possible CPU in a per-cpu variable.
>
> The arch specific (or ARM AMU) counters are updated to adapt to this and
> they take the highest priority if they are available, i.e. they will be
> used instead of CPPC based counters for example.
>
> The special code to rebuild the sched domains, in case invariance status
> change for the system, is moved out of arm64 specific code and is added
> to arch_topology.c.
>
> Note that this also defines SCALE_FREQ_SOURCE_CPUFREQ but doesn't use it
> and it is added to show that cpufreq is also acts as source of
> information for FIE and will be used by default if no other counters are
> supported for a platform.
>
> Reviewed-by: Ionela Voinescu <ionela.voinescu at arm.com>
> Tested-by: Ionela Voinescu <ionela.voinescu at arm.com>
> Signed-off-by: Viresh Kumar <viresh.kumar at linaro.org>
> ---
> arch/arm64/include/asm/topology.h | 10 +--
> arch/arm64/kernel/topology.c | 105 +++++++++++-------------------
> drivers/base/arch_topology.c | 85 ++++++++++++++++++++++--
> include/linux/arch_topology.h | 14 +++-
> 4 files changed, 134 insertions(+), 80 deletions(-)
>
> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> index 3b8dca4eb08d..ec2db3419c41 100644
> --- a/arch/arm64/include/asm/topology.h
> +++ b/arch/arm64/include/asm/topology.h
> @@ -17,17 +17,9 @@ int pcibus_to_node(struct pci_bus *bus);
> #include <linux/arch_topology.h>
>
> void update_freq_counters_refs(void);
> -void topology_scale_freq_tick(void);
> -
> -#ifdef CONFIG_ARM64_AMU_EXTN
> -/*
> - * Replace task scheduler's default counter-based
> - * frequency-invariance scale factor setting.
> - */
> -#define arch_scale_freq_tick topology_scale_freq_tick
> -#endif /* CONFIG_ARM64_AMU_EXTN */
>
> /* Replace task scheduler's default frequency-invariant accounting */
> +#define arch_scale_freq_tick topology_scale_freq_tick
> #define arch_set_freq_scale topology_set_freq_scale
> #define arch_scale_freq_capacity topology_get_freq_scale
> #define arch_scale_freq_invariant topology_scale_freq_invariant
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index e08a4126453a..47fca7376c93 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -199,12 +199,47 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
> return 0;
> }
>
> -static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
> -#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
> +static void amu_scale_freq_tick(void)
> +{
> + u64 prev_core_cnt, prev_const_cnt;
> + u64 core_cnt, const_cnt, scale;
> +
> + prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
> + prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
> +
> + update_freq_counters_refs();
> +
> + const_cnt = this_cpu_read(arch_const_cycles_prev);
> + core_cnt = this_cpu_read(arch_core_cycles_prev);
> +
> + if (unlikely(core_cnt <= prev_core_cnt ||
> + const_cnt <= prev_const_cnt))
> + return;
> +
> + /*
> + * /\core arch_max_freq_scale
> + * scale = ------- * --------------------
> + * /\const SCHED_CAPACITY_SCALE
> + *
> + * See validate_cpu_freq_invariance_counters() for details on
> + * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
> + */
> + scale = core_cnt - prev_core_cnt;
> + scale *= this_cpu_read(arch_max_freq_scale);
> + scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
> + const_cnt - prev_const_cnt);
> +
> + scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
> + this_cpu_write(freq_scale, (unsigned long)scale);
> +}
> +
> +static struct scale_freq_data amu_sfd = {
> + .source = SCALE_FREQ_SOURCE_ARCH,
> + .set_freq_scale = amu_scale_freq_tick,
> +};
>
> static void amu_fie_setup(const struct cpumask *cpus)
> {
> - bool invariant;
> int cpu;
>
> /* We are already set since the last insmod of cpufreq driver */
> @@ -221,25 +256,10 @@ static void amu_fie_setup(const struct cpumask *cpus)
>
> cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
>
> - invariant = topology_scale_freq_invariant();
> -
> - /* We aren't fully invariant yet */
> - if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask))
> - return;
> -
> - static_branch_enable(&amu_fie_key);
> + topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus);
>
> pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
> cpumask_pr_args(cpus));
> -
> - /*
> - * Task scheduler behavior depends on frequency invariance support,
> - * either cpufreq or counter driven. If the support status changes as
> - * a result of counter initialisation and use, retrigger the build of
> - * scheduling domains to ensure the information is propagated properly.
> - */
> - if (!invariant)
> - rebuild_sched_domains_energy();
> }
>
> static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
> @@ -283,53 +303,6 @@ static int __init init_amu_fie(void)
> }
> core_initcall(init_amu_fie);
>
> -bool arch_freq_counters_available(const struct cpumask *cpus)
> -{
> - return amu_freq_invariant() &&
> - cpumask_subset(cpus, amu_fie_cpus);
> -}
> -
> -void topology_scale_freq_tick(void)
> -{
> - u64 prev_core_cnt, prev_const_cnt;
> - u64 core_cnt, const_cnt, scale;
> - int cpu = smp_processor_id();
> -
> - if (!amu_freq_invariant())
> - return;
> -
> - if (!cpumask_test_cpu(cpu, amu_fie_cpus))
> - return;
> -
> - prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
> - prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
> -
> - update_freq_counters_refs();
> -
> - const_cnt = this_cpu_read(arch_const_cycles_prev);
> - core_cnt = this_cpu_read(arch_core_cycles_prev);
> -
> - if (unlikely(core_cnt <= prev_core_cnt ||
> - const_cnt <= prev_const_cnt))
> - return;
> -
> - /*
> - * /\core arch_max_freq_scale
> - * scale = ------- * --------------------
> - * /\const SCHED_CAPACITY_SCALE
> - *
> - * See validate_cpu_freq_invariance_counters() for details on
> - * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
> - */
> - scale = core_cnt - prev_core_cnt;
> - scale *= this_cpu_read(arch_max_freq_scale);
> - scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
> - const_cnt - prev_const_cnt);
> -
> - scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
> - this_cpu_write(freq_scale, (unsigned long)scale);
> -}
> -
> #ifdef CONFIG_ACPI_CPPC_LIB
> #include <acpi/cppc_acpi.h>
>
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index de8587cc119e..8f62dbf93f67 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -21,17 +21,94 @@
> #include <linux/sched.h>
> #include <linux/smp.h>
>
> +static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
> +static struct cpumask scale_freq_counters_mask;
> +static bool scale_freq_invariant;
> +
> +static bool supports_scale_freq_counters(const struct cpumask *cpus)
> +{
> + return cpumask_subset(cpus, &scale_freq_counters_mask);
> +}
> +
> bool topology_scale_freq_invariant(void)
> {
> return cpufreq_supports_freq_invariance() ||
> - arch_freq_counters_available(cpu_online_mask);
> + supports_scale_freq_counters(cpu_online_mask);
> }
>
> -__weak bool arch_freq_counters_available(const struct cpumask *cpus)
> +static void update_scale_freq_invariant(bool status)
> {
> - return false;
> + if (scale_freq_invariant == status)
> + return;
> +
> + /*
> + * Task scheduler behavior depends on frequency invariance support,
> + * either cpufreq or counter driven. If the support status changes as
> + * a result of counter initialisation and use, retrigger the build of
> + * scheduling domains to ensure the information is propagated properly.
> + */
> + if (topology_scale_freq_invariant() == status) {
> + scale_freq_invariant = status;
> + rebuild_sched_domains_energy();
> + }
> }
> +
> +void topology_set_scale_freq_source(struct scale_freq_data *data,
> + const struct cpumask *cpus)
> +{
> + struct scale_freq_data *sfd;
> + int cpu;
> +
> + /*
> + * Avoid calling rebuild_sched_domains() unnecessarily if FIE is
> + * supported by cpufreq.
> + */
> + if (cpumask_empty(&scale_freq_counters_mask))
> + scale_freq_invariant = topology_scale_freq_invariant();
> +
> + for_each_cpu(cpu, cpus) {
> + sfd = per_cpu(sft_data, cpu);
> +
> + /* Use ARCH provided counters whenever possible */
> + if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
> + per_cpu(sft_data, cpu) = data;
> + cpumask_set_cpu(cpu, &scale_freq_counters_mask);
> + }
> + }
> +
> + update_scale_freq_invariant(true);
> +}
> +EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
> +
> +void topology_clear_scale_freq_source(enum scale_freq_source source,
> + const struct cpumask *cpus)
> +{
> + struct scale_freq_data *sfd;
> + int cpu;
> +
> + for_each_cpu(cpu, cpus) {
> + sfd = per_cpu(sft_data, cpu);
> +
> + if (sfd && sfd->source == source) {
> + per_cpu(sft_data, cpu) = NULL;
> + cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
> + }
> + }
> +
> + update_scale_freq_invariant(false);
> +}
> +EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);
> +
> +void topology_scale_freq_tick(void)
> +{
> + struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data);
> +
> + if (sfd)
> + sfd->set_freq_scale();
> +}
> +
> DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
> +EXPORT_SYMBOL_GPL(freq_scale);
>
> void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
> unsigned long max_freq)
> @@ -47,7 +124,7 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
> * want to update the scale factor with information from CPUFREQ.
> * Instead the scale factor will be updated from arch_scale_freq_tick.
> */
> - if (arch_freq_counters_available(cpus))
> + if (supports_scale_freq_counters(cpus))
> return;
>
> scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
> diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
> index 0f6cd6b73a61..3bcfba5c21a7 100644
> --- a/include/linux/arch_topology.h
> +++ b/include/linux/arch_topology.h
> @@ -34,7 +34,19 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
> unsigned long max_freq);
> bool topology_scale_freq_invariant(void);
>
> -bool arch_freq_counters_available(const struct cpumask *cpus);
> +enum scale_freq_source {
> + SCALE_FREQ_SOURCE_CPUFREQ = 0,
> + SCALE_FREQ_SOURCE_ARCH,
> +};
> +
> +struct scale_freq_data {
> + enum scale_freq_source source;
> + void (*set_freq_scale)(void);
> +};
> +
> +void topology_scale_freq_tick(void);
> +void topology_set_scale_freq_source(struct scale_freq_data *data, const struct cpumask *cpus);
> +void topology_clear_scale_freq_source(enum scale_freq_source source, const struct cpumask *cpus);
Nit: can you split these lines?
Ionela.
>
> DECLARE_PER_CPU(unsigned long, thermal_pressure);
>
> --
> 2.25.0.rc1.19.g042ed3e048af
>
More information about the linux-arm-kernel
mailing list