[PATCH V5 1/2] topology: Allow multiple entities to provide sched_freq_tick() callback

Ionela Voinescu ionela.voinescu at arm.com
Tue Mar 9 15:11:57 GMT 2021


On Monday 01 Mar 2021 at 12:21:17 (+0530), Viresh Kumar wrote:
> This patch attempts to make it generic enough so other parts of the
> kernel can also provide their own implementation of scale_freq_tick()
> callback, which is called by the scheduler periodically to update the
> per-cpu freq_scale variable.
> 
> The implementations now need to provide 'struct scale_freq_data' for the
> CPUs for which they have hardware counters available, and a callback
> gets registered for each possible CPU in a per-cpu variable.
> 
> The arch specific (or ARM AMU) counters are updated to adapt to this and
> they take the highest priority if they are available, i.e. they will be
> used instead of CPPC based counters for example.
> 
> The special code to rebuild the sched domains, in case invariance status
> change for the system, is moved out of arm64 specific code and is added
> to arch_topology.c.
> 
> Note that this also defines SCALE_FREQ_SOURCE_CPUFREQ but doesn't use it
> and it is added to show that cpufreq is also acts as source of
> information for FIE and will be used by default if no other counters are
> supported for a platform.
> 
> Reviewed-by: Ionela Voinescu <ionela.voinescu at arm.com>
> Tested-by: Ionela Voinescu <ionela.voinescu at arm.com>
> Signed-off-by: Viresh Kumar <viresh.kumar at linaro.org>
> ---
>  arch/arm64/include/asm/topology.h |  10 +--
>  arch/arm64/kernel/topology.c      | 105 +++++++++++-------------------
>  drivers/base/arch_topology.c      |  85 ++++++++++++++++++++++--
>  include/linux/arch_topology.h     |  14 +++-
>  4 files changed, 134 insertions(+), 80 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> index 3b8dca4eb08d..ec2db3419c41 100644
> --- a/arch/arm64/include/asm/topology.h
> +++ b/arch/arm64/include/asm/topology.h
> @@ -17,17 +17,9 @@ int pcibus_to_node(struct pci_bus *bus);
>  #include <linux/arch_topology.h>
>  
>  void update_freq_counters_refs(void);
> -void topology_scale_freq_tick(void);
> -
> -#ifdef CONFIG_ARM64_AMU_EXTN
> -/*
> - * Replace task scheduler's default counter-based
> - * frequency-invariance scale factor setting.
> - */
> -#define arch_scale_freq_tick topology_scale_freq_tick
> -#endif /* CONFIG_ARM64_AMU_EXTN */
>  
>  /* Replace task scheduler's default frequency-invariant accounting */
> +#define arch_scale_freq_tick topology_scale_freq_tick
>  #define arch_set_freq_scale topology_set_freq_scale
>  #define arch_scale_freq_capacity topology_get_freq_scale
>  #define arch_scale_freq_invariant topology_scale_freq_invariant
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index e08a4126453a..47fca7376c93 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -199,12 +199,47 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
>  	return 0;
>  }
>  
> -static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
> -#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
> +static void amu_scale_freq_tick(void)
> +{
> +	u64 prev_core_cnt, prev_const_cnt;
> +	u64 core_cnt, const_cnt, scale;
> +
> +	prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
> +	prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
> +
> +	update_freq_counters_refs();
> +
> +	const_cnt = this_cpu_read(arch_const_cycles_prev);
> +	core_cnt = this_cpu_read(arch_core_cycles_prev);
> +
> +	if (unlikely(core_cnt <= prev_core_cnt ||
> +		     const_cnt <= prev_const_cnt))
> +		return;
> +
> +	/*
> +	 *	    /\core    arch_max_freq_scale
> +	 * scale =  ------- * --------------------
> +	 *	    /\const   SCHED_CAPACITY_SCALE
> +	 *
> +	 * See validate_cpu_freq_invariance_counters() for details on
> +	 * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
> +	 */
> +	scale = core_cnt - prev_core_cnt;
> +	scale *= this_cpu_read(arch_max_freq_scale);
> +	scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
> +			  const_cnt - prev_const_cnt);
> +
> +	scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
> +	this_cpu_write(freq_scale, (unsigned long)scale);
> +}
> +
> +static struct scale_freq_data amu_sfd = {
> +	.source = SCALE_FREQ_SOURCE_ARCH,
> +	.set_freq_scale = amu_scale_freq_tick,
> +};
>  
>  static void amu_fie_setup(const struct cpumask *cpus)
>  {
> -	bool invariant;
>  	int cpu;
>  
>  	/* We are already set since the last insmod of cpufreq driver */
> @@ -221,25 +256,10 @@ static void amu_fie_setup(const struct cpumask *cpus)
>  
>  	cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
>  
> -	invariant = topology_scale_freq_invariant();
> -
> -	/* We aren't fully invariant yet */
> -	if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask))
> -		return;
> -
> -	static_branch_enable(&amu_fie_key);
> +	topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus);
>  
>  	pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
>  		 cpumask_pr_args(cpus));
> -
> -	/*
> -	 * Task scheduler behavior depends on frequency invariance support,
> -	 * either cpufreq or counter driven. If the support status changes as
> -	 * a result of counter initialisation and use, retrigger the build of
> -	 * scheduling domains to ensure the information is propagated properly.
> -	 */
> -	if (!invariant)
> -		rebuild_sched_domains_energy();
>  }
>  
>  static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
> @@ -283,53 +303,6 @@ static int __init init_amu_fie(void)
>  }
>  core_initcall(init_amu_fie);
>  
> -bool arch_freq_counters_available(const struct cpumask *cpus)
> -{
> -	return amu_freq_invariant() &&
> -	       cpumask_subset(cpus, amu_fie_cpus);
> -}
> -
> -void topology_scale_freq_tick(void)
> -{
> -	u64 prev_core_cnt, prev_const_cnt;
> -	u64 core_cnt, const_cnt, scale;
> -	int cpu = smp_processor_id();
> -
> -	if (!amu_freq_invariant())
> -		return;
> -
> -	if (!cpumask_test_cpu(cpu, amu_fie_cpus))
> -		return;
> -
> -	prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
> -	prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
> -
> -	update_freq_counters_refs();
> -
> -	const_cnt = this_cpu_read(arch_const_cycles_prev);
> -	core_cnt = this_cpu_read(arch_core_cycles_prev);
> -
> -	if (unlikely(core_cnt <= prev_core_cnt ||
> -		     const_cnt <= prev_const_cnt))
> -		return;
> -
> -	/*
> -	 *	    /\core    arch_max_freq_scale
> -	 * scale =  ------- * --------------------
> -	 *	    /\const   SCHED_CAPACITY_SCALE
> -	 *
> -	 * See validate_cpu_freq_invariance_counters() for details on
> -	 * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
> -	 */
> -	scale = core_cnt - prev_core_cnt;
> -	scale *= this_cpu_read(arch_max_freq_scale);
> -	scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
> -			  const_cnt - prev_const_cnt);
> -
> -	scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
> -	this_cpu_write(freq_scale, (unsigned long)scale);
> -}
> -
>  #ifdef CONFIG_ACPI_CPPC_LIB
>  #include <acpi/cppc_acpi.h>
>  
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index de8587cc119e..8f62dbf93f67 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -21,17 +21,94 @@
>  #include <linux/sched.h>
>  #include <linux/smp.h>
>  
> +static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
> +static struct cpumask scale_freq_counters_mask;
> +static bool scale_freq_invariant;
> +
> +static bool supports_scale_freq_counters(const struct cpumask *cpus)
> +{
> +	return cpumask_subset(cpus, &scale_freq_counters_mask);
> +}
> +
>  bool topology_scale_freq_invariant(void)
>  {
>  	return cpufreq_supports_freq_invariance() ||
> -	       arch_freq_counters_available(cpu_online_mask);
> +	       supports_scale_freq_counters(cpu_online_mask);
>  }
>  
> -__weak bool arch_freq_counters_available(const struct cpumask *cpus)
> +static void update_scale_freq_invariant(bool status)
>  {
> -	return false;
> +	if (scale_freq_invariant == status)
> +		return;
> +
> +	/*
> +	 * Task scheduler behavior depends on frequency invariance support,
> +	 * either cpufreq or counter driven. If the support status changes as
> +	 * a result of counter initialisation and use, retrigger the build of
> +	 * scheduling domains to ensure the information is propagated properly.
> +	 */
> +	if (topology_scale_freq_invariant() == status) {
> +		scale_freq_invariant = status;
> +		rebuild_sched_domains_energy();
> +	}
>  }
> +
> +void topology_set_scale_freq_source(struct scale_freq_data *data,
> +				    const struct cpumask *cpus)
> +{
> +	struct scale_freq_data *sfd;
> +	int cpu;
> +
> +	/*
> +	 * Avoid calling rebuild_sched_domains() unnecessarily if FIE is
> +	 * supported by cpufreq.
> +	 */
> +	if (cpumask_empty(&scale_freq_counters_mask))
> +		scale_freq_invariant = topology_scale_freq_invariant();
> +
> +	for_each_cpu(cpu, cpus) {
> +		sfd = per_cpu(sft_data, cpu);
> +
> +		/* Use ARCH provided counters whenever possible */
> +		if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
> +			per_cpu(sft_data, cpu) = data;
> +			cpumask_set_cpu(cpu, &scale_freq_counters_mask);
> +		}
> +	}
> +
> +	update_scale_freq_invariant(true);
> +}
> +EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
> +
> +void topology_clear_scale_freq_source(enum scale_freq_source source,
> +				      const struct cpumask *cpus)
> +{
> +	struct scale_freq_data *sfd;
> +	int cpu;
> +
> +	for_each_cpu(cpu, cpus) {
> +		sfd = per_cpu(sft_data, cpu);
> +
> +		if (sfd && sfd->source == source) {
> +			per_cpu(sft_data, cpu) = NULL;
> +			cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
> +		}
> +	}
> +
> +	update_scale_freq_invariant(false);
> +}
> +EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);
> +
> +void topology_scale_freq_tick(void)
> +{
> +	struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data);
> +
> +	if (sfd)
> +		sfd->set_freq_scale();
> +}
> +
>  DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
> +EXPORT_SYMBOL_GPL(freq_scale);
>  
>  void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
>  			     unsigned long max_freq)
> @@ -47,7 +124,7 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
>  	 * want to update the scale factor with information from CPUFREQ.
>  	 * Instead the scale factor will be updated from arch_scale_freq_tick.
>  	 */
> -	if (arch_freq_counters_available(cpus))
> +	if (supports_scale_freq_counters(cpus))
>  		return;
>  
>  	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
> diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
> index 0f6cd6b73a61..3bcfba5c21a7 100644
> --- a/include/linux/arch_topology.h
> +++ b/include/linux/arch_topology.h
> @@ -34,7 +34,19 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
>  			     unsigned long max_freq);
>  bool topology_scale_freq_invariant(void);
>  
> -bool arch_freq_counters_available(const struct cpumask *cpus);
> +enum scale_freq_source {
> +	SCALE_FREQ_SOURCE_CPUFREQ = 0,
> +	SCALE_FREQ_SOURCE_ARCH,
> +};
> +
> +struct scale_freq_data {
> +	enum scale_freq_source source;
> +	void (*set_freq_scale)(void);
> +};
> +
> +void topology_scale_freq_tick(void);
> +void topology_set_scale_freq_source(struct scale_freq_data *data, const struct cpumask *cpus);
> +void topology_clear_scale_freq_source(enum scale_freq_source source, const struct cpumask *cpus);

Nit: can you split these lines?

Ionela.

>  
>  DECLARE_PER_CPU(unsigned long, thermal_pressure);
>  
> -- 
> 2.25.0.rc1.19.g042ed3e048af
> 



More information about the linux-arm-kernel mailing list