[PATCH v1 1/3] arm64: topology: Add arch_freq_get_on_cpu() support
lihuisong (C)
lihuisong at huawei.com
Wed Mar 6 19:02:34 PST 2024
Hi Vanshidhar,
在 2024/3/1 0:25, Vanshidhar Konda 写道:
> AMU counters are used by the Frequency Invariance Engine (FIE) to
> estimate the CPU utilization during each tick. The delta of the AMU
> counters between two ticks can also be used to estimate the average CPU
> frequency of each core over the tick duration. Measure the AMU counters
> during tick, compute the delta and store it. When the frequency of the
> core is queried, use the stored delta to determine the frequency.
>
> arch_freq_get_on_cpu() is used on x86 systems to estimate the frequency
> of each CPU. It can be wired up on arm64 for the same functionality.
>
> Signed-off-by: Vanshidhar Konda <vanshikonda at os.amperecomputing.com>
> ---
> arch/arm64/kernel/topology.c | 114 +++++++++++++++++++++++++++++------
> 1 file changed, 96 insertions(+), 18 deletions(-)
>
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index 1a2c72f3e7f8..db8d14525cf4 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -17,6 +17,8 @@
> #include <linux/cpufreq.h>
> #include <linux/init.h>
> #include <linux/percpu.h>
> +#include <linux/sched/isolation.h>
> +#include <linux/seqlock_types.h>
>
> #include <asm/cpu.h>
> #include <asm/cputype.h>
> @@ -82,20 +84,54 @@ int __init parse_acpi_topology(void)
> #undef pr_fmt
> #define pr_fmt(fmt) "AMU: " fmt
>
> +struct amu_counters {
> + seqcount_t seq;
> + unsigned long last_update;
> + u64 core_cnt;
> + u64 const_cnt;
> + u64 delta_core_cnt;
> + u64 delta_const_cnt;
> +};
> +
> /*
> * Ensure that amu_scale_freq_tick() will return SCHED_CAPACITY_SCALE until
> * the CPU capacity and its associated frequency have been correctly
> * initialized.
> */
> -static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
> -static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
> -static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
> +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) =
> + 1UL << (2 * SCHED_CAPACITY_SHIFT);
> +static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_counters, cpu_samples) = {
> + .seq = SEQCNT_ZERO(cpu_samples.seq)
> +};
> static cpumask_var_t amu_fie_cpus;
>
> void update_freq_counters_refs(void)
> {
> - this_cpu_write(arch_core_cycles_prev, read_corecnt());
> - this_cpu_write(arch_const_cycles_prev, read_constcnt());
> + struct amu_counters *cpu_sample = this_cpu_ptr(&cpu_samples);
> + u64 core_cnt, const_cnt, delta_core_cnt, delta_const_cnt;
> +
> + const_cnt = read_constcnt();
> + core_cnt = read_corecnt();
> +
> + if (unlikely(core_cnt < cpu_sample->core_cnt) ||
> + unlikely(const_cnt < cpu_sample->const_cnt)) {
The two counter register might be wrap around. So here is not fully
true, right?
If we don't consider this case, below warning should be removed.
> + WARN(1, "AMU counter values should be monotonic.\n");
> + cpu_sample->delta_const_cnt = 0;
> + cpu_sample->delta_core_cnt = 0;
> + return;
> + }
> +
> + delta_core_cnt = core_cnt - cpu_sample->core_cnt;
> + delta_const_cnt = const_cnt - cpu_sample->const_cnt;
> +
> + cpu_sample->core_cnt = core_cnt;
> + cpu_sample->const_cnt = const_cnt;
> +
> + raw_write_seqcount_begin(&cpu_sample->seq);
> + cpu_sample->last_update = jiffies;
> + cpu_sample->delta_const_cnt = delta_const_cnt;
> + cpu_sample->delta_core_cnt = delta_core_cnt;
> + raw_write_seqcount_end(&cpu_sample->seq);
> }
<...>
More information about the linux-arm-kernel
mailing list