[PATCH v3 2/2] cpufreq: CPPC: Register EM based on efficiency class information
Shaokun Zhang
zhangshaokun at hisilicon.com
Sun May 29 23:42:44 PDT 2022
Hi,
There is a warning on arm64 platform when CONFIG_ENERGY_MODEL is not set:
drivers/cpufreq/cppc_cpufreq.c:550:12: error: ‘cppc_get_cpu_cost’ defined but not used
[-Werror=unused-function]
550 | static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
| ^~~~~~~~~~~~~~~~~
drivers/cpufreq/cppc_cpufreq.c:481:12: error: ‘cppc_get_cpu_power’ defined but not used
[-Werror=unused-function]
481 | static int cppc_get_cpu_power(struct device *cpu_dev,
| ^~~~~~~~~~~~~~~~~~
Thanks,
Shaokun
On 2022/4/25 20:38, Pierre Gondois wrote:
> From: Pierre Gondois <Pierre.Gondois at arm.com>
>
> Performance states and energy consumption values are not advertised
> in ACPI. In the GicC structure of the MADT table, the "Processor
> Power Efficiency Class field" (called efficiency class from now)
> allows to describe the relative energy efficiency of CPUs.
>
> To leverage the EM and EAS, the CPPC driver creates a set of
> artificial performance states and registers them in the Energy Model
> (EM), such as:
> - Every 20 capacity unit, a performance state is created.
> - The energy cost of each performance state gradually increases.
> No power value is generated as only the cost is used in the EM.
>
> During task placement, a task can raise the frequency of its whole
> pd. This can make EAS place a task on a pd with CPUs that are
> individually less energy efficient.
> As cost values are artificial, and to place tasks on CPUs with the
> lower efficiency class, a gap in cost values is generated for adjacent
> efficiency classes.
> E.g.:
> - efficiency class = 0, capacity is in [0-1024], so cost values
> are in [0: 51] (one performance state every 20 capacity unit)
> - efficiency class = 1, capacity is in [0-1024], cost values
> are in [1*gap+0: 1*gap+51].
>
> The value of the cost gap is chosen to absorb a the energy of 4 CPUs
> at their maximum capacity. This means that between:
> 1- a pd of 4 CPUs, each of them being used at almost their full
> capacity. Their efficiency class is N.
> 2- a CPU using almost none of its capacity. Its efficiency class is
> N+1
> EAS will choose the first option.
>
> This patch also populates the (struct cpufreq_driver).register_em
> callback if the valid efficiency_class ACPI values are provided.
>
> Signed-off-by: Pierre Gondois <Pierre.Gondois at arm.com>
> ---
> drivers/cpufreq/cppc_cpufreq.c | 144 +++++++++++++++++++++++++++++++++
> 1 file changed, 144 insertions(+)
>
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index 3cd05651707d..3eaa23d1aaf5 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c
> @@ -421,6 +421,134 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
> }
>
> static DEFINE_PER_CPU(unsigned int, efficiency_class);
> +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
> +
> +/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
> +#define CPPC_EM_CAP_STEP (20)
> +/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
> +#define CPPC_EM_COST_STEP (1)
> +/* Add a cost gap correspnding to the energy of 4 CPUs. */
> +#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
> + / CPPC_EM_CAP_STEP)
> +
> +static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
> +{
> + struct cppc_perf_caps *perf_caps;
> + unsigned int min_cap, max_cap;
> + struct cppc_cpudata *cpu_data;
> + int cpu = policy->cpu;
> +
> + cpu_data = policy->driver_data;
> + perf_caps = &cpu_data->perf_caps;
> + max_cap = arch_scale_cpu_capacity(cpu);
> + min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf);
> + if ((min_cap == 0) || (max_cap < min_cap))
> + return 0;
> + return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
> +}
> +
> +/*
> + * The cost is defined as:
> + * cost = power * max_frequency / frequency
> + */
> +static inline unsigned long compute_cost(int cpu, int step)
> +{
> + return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
> + step * CPPC_EM_COST_STEP;
> +}
> +
> +static int cppc_get_cpu_power(struct device *cpu_dev,
> + unsigned long *power, unsigned long *KHz)
> +{
> + unsigned long perf_step, perf_prev, perf, perf_check;
> + unsigned int min_step, max_step, step, step_check;
> + unsigned long prev_freq = *KHz;
> + unsigned int min_cap, max_cap;
> + struct cpufreq_policy *policy;
> +
> + struct cppc_perf_caps *perf_caps;
> + struct cppc_cpudata *cpu_data;
> +
> + policy = cpufreq_cpu_get_raw(cpu_dev->id);
> + cpu_data = policy->driver_data;
> + perf_caps = &cpu_data->perf_caps;
> + max_cap = arch_scale_cpu_capacity(cpu_dev->id);
> + min_cap = div_u64(max_cap * perf_caps->lowest_perf,
> + perf_caps->highest_perf);
> +
> + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
> + min_step = min_cap / CPPC_EM_CAP_STEP;
> + max_step = max_cap / CPPC_EM_CAP_STEP;
> +
> + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
> + step = perf_prev / perf_step;
> +
> + if (step > max_step)
> + return -EINVAL;
> +
> + if (min_step == max_step) {
> + step = max_step;
> + perf = perf_caps->highest_perf;
> + } else if (step < min_step) {
> + step = min_step;
> + perf = perf_caps->lowest_perf;
> + } else {
> + step++;
> + if (step == max_step)
> + perf = perf_caps->highest_perf;
> + else
> + perf = step * perf_step;
> + }
> +
> + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
> + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
> + step_check = perf_check / perf_step;
> +
> + /*
> + * To avoid bad integer approximation, check that new frequency value
> + * increased and that the new frequency will be converted to the
> + * desired step value.
> + */
> + while ((*KHz == prev_freq) || (step_check != step)) {
> + perf++;
> + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
> + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
> + step_check = perf_check / perf_step;
> + }
> +
> + /*
> + * With an artificial EM, only the cost value is used. Still the power
> + * is populated such as 0 < power < EM_MAX_POWER. This allows to add
> + * more sense to the artificial performance states.
> + */
> + *power = compute_cost(cpu_dev->id, step);
> +
> + return 0;
> +}
> +
> +static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
> + unsigned long *cost)
> +{
> + unsigned long perf_step, perf_prev;
> + struct cppc_perf_caps *perf_caps;
> + struct cpufreq_policy *policy;
> + struct cppc_cpudata *cpu_data;
> + unsigned int max_cap;
> + int step;
> +
> + policy = cpufreq_cpu_get_raw(cpu_dev->id);
> + cpu_data = policy->driver_data;
> + perf_caps = &cpu_data->perf_caps;
> + max_cap = arch_scale_cpu_capacity(cpu_dev->id);
> +
> + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
> + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
> + step = perf_prev / perf_step;
> +
> + *cost = compute_cost(cpu_dev->id, step);
> +
> + return 0;
> +}
>
> static int populate_efficiency_class(void)
> {
> @@ -453,10 +581,23 @@ static int populate_efficiency_class(void)
> }
> index++;
> }
> + cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
>
> return 0;
> }
>
> +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
> +{
> + struct cppc_cpudata *cpu_data;
> + struct em_data_callback em_cb =
> + EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
> +
> + cpu_data = policy->driver_data;
> + em_dev_register_perf_domain(get_cpu_device(policy->cpu),
> + get_perf_level_count(policy), &em_cb,
> + cpu_data->shared_cpu_map, 0);
> +}
> +
> #else
>
> static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
> @@ -467,6 +608,9 @@ static int populate_efficiency_class(void)
> {
> return 0;
> }
> +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
> +{
> +}
> #endif
>
>
>
More information about the linux-arm-kernel
mailing list