[PATCH v3 2/2] cpufreq: CPPC: Register EM based on efficiency class information

Shaokun Zhang zhangshaokun at hisilicon.com
Sun May 29 23:42:44 PDT 2022


Hi,

There is a warning on arm64 platform when CONFIG_ENERGY_MODEL is not set:
 drivers/cpufreq/cppc_cpufreq.c:550:12: error: ‘cppc_get_cpu_cost’ defined but not used
[-Werror=unused-function]
   550 | static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
       |            ^~~~~~~~~~~~~~~~~
 drivers/cpufreq/cppc_cpufreq.c:481:12: error: ‘cppc_get_cpu_power’ defined but not used
[-Werror=unused-function]
   481 | static int cppc_get_cpu_power(struct device *cpu_dev,
       |            ^~~~~~~~~~~~~~~~~~

Thanks,
Shaokun

On 2022/4/25 20:38, Pierre Gondois wrote:
> From: Pierre Gondois <Pierre.Gondois at arm.com>
> 
> Performance states and energy consumption values are not advertised
> in ACPI. In the GicC structure of the MADT table, the "Processor
> Power Efficiency Class field" (called efficiency class from now)
> allows to describe the relative energy efficiency of CPUs.
> 
> To leverage the EM and EAS, the CPPC driver creates a set of
> artificial performance states and registers them in the Energy Model
> (EM), such as:
> - Every 20 capacity unit, a performance state is created.
> - The energy cost of each performance state gradually increases.
> No power value is generated as only the cost is used in the EM.
> 
> During task placement, a task can raise the frequency of its whole
> pd. This can make EAS place a task on a pd with CPUs that are
> individually less energy efficient.
> As cost values are artificial, and to place tasks on CPUs with the
> lower efficiency class, a gap in cost values is generated for adjacent
> efficiency classes.
> E.g.:
> - efficiency class = 0, capacity is in [0-1024], so cost values
>   are in [0: 51] (one performance state every 20 capacity unit)
> - efficiency class = 1, capacity is in [0-1024], cost values
>   are in [1*gap+0: 1*gap+51].
> 
> The value of the cost gap is chosen to absorb a the energy of 4 CPUs
> at their maximum capacity. This means that between:
> 1- a pd of 4 CPUs, each of them being used at almost their full
>    capacity. Their efficiency class is N.
> 2- a CPU using almost none of its capacity. Its efficiency class is
>    N+1
> EAS will choose the first option.
> 
> This patch also populates the (struct cpufreq_driver).register_em
> callback if the valid efficiency_class ACPI values are provided.
> 
> Signed-off-by: Pierre Gondois <Pierre.Gondois at arm.com>
> ---
>  drivers/cpufreq/cppc_cpufreq.c | 144 +++++++++++++++++++++++++++++++++
>  1 file changed, 144 insertions(+)
> 
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index 3cd05651707d..3eaa23d1aaf5 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c
> @@ -421,6 +421,134 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
>  }
>  
>  static DEFINE_PER_CPU(unsigned int, efficiency_class);
> +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
> +
> +/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
> +#define CPPC_EM_CAP_STEP	(20)
> +/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
> +#define CPPC_EM_COST_STEP	(1)
> +/* Add a cost gap correspnding to the energy of 4 CPUs. */
> +#define CPPC_EM_COST_GAP	(4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
> +				/ CPPC_EM_CAP_STEP)
> +
> +static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
> +{
> +	struct cppc_perf_caps *perf_caps;
> +	unsigned int min_cap, max_cap;
> +	struct cppc_cpudata *cpu_data;
> +	int cpu = policy->cpu;
> +
> +	cpu_data = policy->driver_data;
> +	perf_caps = &cpu_data->perf_caps;
> +	max_cap = arch_scale_cpu_capacity(cpu);
> +	min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf);
> +	if ((min_cap == 0) || (max_cap < min_cap))
> +		return 0;
> +	return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
> +}
> +
> +/*
> + * The cost is defined as:
> + *   cost = power * max_frequency / frequency
> + */
> +static inline unsigned long compute_cost(int cpu, int step)
> +{
> +	return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
> +			step * CPPC_EM_COST_STEP;
> +}
> +
> +static int cppc_get_cpu_power(struct device *cpu_dev,
> +		unsigned long *power, unsigned long *KHz)
> +{
> +	unsigned long perf_step, perf_prev, perf, perf_check;
> +	unsigned int min_step, max_step, step, step_check;
> +	unsigned long prev_freq = *KHz;
> +	unsigned int min_cap, max_cap;
> +	struct cpufreq_policy *policy;
> +
> +	struct cppc_perf_caps *perf_caps;
> +	struct cppc_cpudata *cpu_data;
> +
> +	policy = cpufreq_cpu_get_raw(cpu_dev->id);
> +	cpu_data = policy->driver_data;
> +	perf_caps = &cpu_data->perf_caps;
> +	max_cap = arch_scale_cpu_capacity(cpu_dev->id);
> +	min_cap = div_u64(max_cap * perf_caps->lowest_perf,
> +			perf_caps->highest_perf);
> +
> +	perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
> +	min_step = min_cap / CPPC_EM_CAP_STEP;
> +	max_step = max_cap / CPPC_EM_CAP_STEP;
> +
> +	perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
> +	step = perf_prev / perf_step;
> +
> +	if (step > max_step)
> +		return -EINVAL;
> +
> +	if (min_step == max_step) {
> +		step = max_step;
> +		perf = perf_caps->highest_perf;
> +	} else if (step < min_step) {
> +		step = min_step;
> +		perf = perf_caps->lowest_perf;
> +	} else {
> +		step++;
> +		if (step == max_step)
> +			perf = perf_caps->highest_perf;
> +		else
> +			perf = step * perf_step;
> +	}
> +
> +	*KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
> +	perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
> +	step_check = perf_check / perf_step;
> +
> +	/*
> +	 * To avoid bad integer approximation, check that new frequency value
> +	 * increased and that the new frequency will be converted to the
> +	 * desired step value.
> +	 */
> +	while ((*KHz == prev_freq) || (step_check != step)) {
> +		perf++;
> +		*KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
> +		perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
> +		step_check = perf_check / perf_step;
> +	}
> +
> +	/*
> +	 * With an artificial EM, only the cost value is used. Still the power
> +	 * is populated such as 0 < power < EM_MAX_POWER. This allows to add
> +	 * more sense to the artificial performance states.
> +	 */
> +	*power = compute_cost(cpu_dev->id, step);
> +
> +	return 0;
> +}
> +
> +static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
> +		unsigned long *cost)
> +{
> +	unsigned long perf_step, perf_prev;
> +	struct cppc_perf_caps *perf_caps;
> +	struct cpufreq_policy *policy;
> +	struct cppc_cpudata *cpu_data;
> +	unsigned int max_cap;
> +	int step;
> +
> +	policy = cpufreq_cpu_get_raw(cpu_dev->id);
> +	cpu_data = policy->driver_data;
> +	perf_caps = &cpu_data->perf_caps;
> +	max_cap = arch_scale_cpu_capacity(cpu_dev->id);
> +
> +	perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
> +	perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
> +	step = perf_prev / perf_step;
> +
> +	*cost = compute_cost(cpu_dev->id, step);
> +
> +	return 0;
> +}
>  
>  static int populate_efficiency_class(void)
>  {
> @@ -453,10 +581,23 @@ static int populate_efficiency_class(void)
>  		}
>  		index++;
>  	}
> +	cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
>  
>  	return 0;
>  }
>  
> +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
> +{
> +	struct cppc_cpudata *cpu_data;
> +	struct em_data_callback em_cb =
> +		EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
> +
> +	cpu_data = policy->driver_data;
> +	em_dev_register_perf_domain(get_cpu_device(policy->cpu),
> +			get_perf_level_count(policy), &em_cb,
> +			cpu_data->shared_cpu_map, 0);
> +}
> +
>  #else
>  
>  static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
> @@ -467,6 +608,9 @@ static int populate_efficiency_class(void)
>  {
>  	return 0;
>  }
> +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
> +{
> +}
>  #endif
>  
>  
> 



More information about the linux-arm-kernel mailing list