[PATCH v1 01/23] perf tools: Warn if no user requested CPUs match PMU's CPUs

Liang, Kan kan.liang at linux.intel.com
Mon May 22 04:33:34 PDT 2023



On 2023-05-22 12:52 a.m., Ian Rogers wrote:
> On Sun, May 21, 2023 at 12:04 PM Liang, Kan <kan.liang at linux.intel.com> wrote:
>>
>>
>>
>> On 2023-05-17 10:57 a.m., Ian Rogers wrote:
>>> In commit 1d3351e631fc ("perf tools: Enable on a list of CPUs for hybrid")
>>> perf on hybrid will warn if a user requested CPU doesn't match the PMU
>>> of the given event but only for hybrid PMUs. Make the logic generic
>>> for all PMUs and remove the hybrid logic.
>>
>> The logic may not work for all PMUs, especially for uncore.
>>
>> For uncore, the user should be able to read counters from any CPU of a
>> socket/die, not just the CPU from the cpumask.
>> Please see d6a2f9035bfc ("perf/core: Introduce PMU_EV_CAP_READ_ACTIVE_PKG").
>>
>> Thanks,
>> Kan
> 
> Hi Kan,
> 
> the code specifically handles this and for uncore PMUs the check is
> that the CPU is any online CPU:
> 
> """
> ... Uncore PMUs list a CPU in sysfs, but this may be overwritten by a
> + *     user requested CPU and so any online CPU is applicable. Core PMUs handle
> + *     events on the CPUs in their list and otherwise the event isn't
> supported.
> ...
> +               to_test = pmu && pmu->is_uncore ? cpu_map__online() :
> evsel__cpus(pos);
> ...
> """
> 
> in the commit message this is also called out:
> 
> """
> For example on a 16 (0-15) CPU system:
> ```
> $ perf stat -e imc_free_running/data_read/,cycles -C 16 true
> WARNING: Requested CPU(s) '16' not supported by PMU
> 'uncore_imc_free_running_1' for event 'imc_free_running/data_read/'
> WARNING: Requested CPU(s) '16' not supported by PMU
> 'uncore_imc_free_running_0' for event 'imc_free_running/data_read/'
> WARNING: Requested CPU(s) '16' not supported by PMU 'cpu' for event 'cycles'
> ...
> """
> CPU 16 is invalid, only 0 to 15 are valid, and so there are warnings
> for the uncore events for "imc_free_running/data_read/".
>

Ah, I thought CPU 16 is from another socket. For the invalid/offline
CPU, a warning should be OK.

Thanks,
Kan

> Thanks,
> Ian
> 
>>>
>>> Warn if a CPU is requested that is offline for uncore events. Warn if
>>> a CPU is requested for a core PMU, but the CPU isn't within the cpu
>>> map of that PMU.
>>>
>>> For example on a 16 (0-15) CPU system:
>>> ```
>>> $ perf stat -e imc_free_running/data_read/,cycles -C 16 true
>>> WARNING: Requested CPU(s) '16' not supported by PMU 'uncore_imc_free_running_1' for event 'imc_free_running/data_read/'
>>> WARNING: Requested CPU(s) '16' not supported by PMU 'uncore_imc_free_running_0' for event 'imc_free_running/data_read/'
>>> WARNING: Requested CPU(s) '16' not supported by PMU 'cpu' for event 'cycles'
>>>
>>>  Performance counter stats for 'CPU(s) 16':
>>>
>>>    <not supported> MiB  imc_free_running/data_read/
>>>    <not supported>      cycles
>>>
>>>        0.000570094 seconds time elapsed
>>> ```
>>>
>>> Signed-off-by: Ian Rogers <irogers at google.com>
>>> ---
>>>  tools/perf/builtin-record.c     |  6 +--
>>>  tools/perf/builtin-stat.c       |  5 +--
>>>  tools/perf/util/cpumap.h        |  2 +-
>>>  tools/perf/util/evlist-hybrid.c | 74 ---------------------------------
>>>  tools/perf/util/evlist-hybrid.h |  1 -
>>>  tools/perf/util/evlist.c        | 44 ++++++++++++++++++++
>>>  tools/perf/util/evlist.h        |  2 +
>>>  tools/perf/util/pmu.c           | 33 ---------------
>>>  tools/perf/util/pmu.h           |  4 --
>>>  9 files changed, 49 insertions(+), 122 deletions(-)
>>>
>>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>>> index ec0f2d5f189f..9d212236c75a 100644
>>> --- a/tools/perf/builtin-record.c
>>> +++ b/tools/perf/builtin-record.c
>>> @@ -4198,11 +4198,7 @@ int cmd_record(int argc, const char **argv)
>>>       /* Enable ignoring missing threads when -u/-p option is defined. */
>>>       rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
>>>
>>> -     if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
>>> -             pr_err("failed to use cpu list %s\n",
>>> -                    rec->opts.target.cpu_list);
>>> -             goto out;
>>> -     }
>>> +     evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
>>>
>>>       rec->opts.target.hybrid = perf_pmu__has_hybrid();
>>>
>>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>>> index bc45cee3f77c..612467216306 100644
>>> --- a/tools/perf/builtin-stat.c
>>> +++ b/tools/perf/builtin-stat.c
>>> @@ -2462,10 +2462,7 @@ int cmd_stat(int argc, const char **argv)
>>>               }
>>>       }
>>>
>>> -     if (evlist__fix_hybrid_cpus(evsel_list, target.cpu_list)) {
>>> -             pr_err("failed to use cpu list %s\n", target.cpu_list);
>>> -             goto out;
>>> -     }
>>> +     evlist__warn_user_requested_cpus(evsel_list, target.cpu_list);
>>>
>>>       target.hybrid = perf_pmu__has_hybrid();
>>>       if (evlist__create_maps(evsel_list, &target) < 0) {
>>> diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
>>> index e3426541e0aa..c1de993c083f 100644
>>> --- a/tools/perf/util/cpumap.h
>>> +++ b/tools/perf/util/cpumap.h
>>> @@ -59,7 +59,7 @@ struct perf_cpu cpu__max_present_cpu(void);
>>>  /**
>>>   * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1.
>>>   */
>>> -static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus)
>>> +static inline bool cpu_map__is_dummy(const struct perf_cpu_map *cpus)
>>>  {
>>>       return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1;
>>>  }
>>> diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c
>>> index 57f02beef023..db3f5fbdebe1 100644
>>> --- a/tools/perf/util/evlist-hybrid.c
>>> +++ b/tools/perf/util/evlist-hybrid.c
>>> @@ -86,77 +86,3 @@ bool evlist__has_hybrid(struct evlist *evlist)
>>>
>>>       return false;
>>>  }
>>> -
>>> -int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list)
>>> -{
>>> -     struct perf_cpu_map *cpus;
>>> -     struct evsel *evsel, *tmp;
>>> -     struct perf_pmu *pmu;
>>> -     int ret, unmatched_count = 0, events_nr = 0;
>>> -
>>> -     if (!perf_pmu__has_hybrid() || !cpu_list)
>>> -             return 0;
>>> -
>>> -     cpus = perf_cpu_map__new(cpu_list);
>>> -     if (!cpus)
>>> -             return -1;
>>> -
>>> -     /*
>>> -      * The evsels are created with hybrid pmu's cpus. But now we
>>> -      * need to check and adjust the cpus of evsel by cpu_list because
>>> -      * cpu_list may cause conflicts with cpus of evsel. For example,
>>> -      * cpus of evsel is cpu0-7, but the cpu_list is cpu6-8, we need
>>> -      * to adjust the cpus of evsel to cpu6-7. And then propatate maps
>>> -      * in evlist__create_maps().
>>> -      */
>>> -     evlist__for_each_entry_safe(evlist, tmp, evsel) {
>>> -             struct perf_cpu_map *matched_cpus, *unmatched_cpus;
>>> -             char buf1[128], buf2[128];
>>> -
>>> -             pmu = perf_pmu__find_hybrid_pmu(evsel->pmu_name);
>>> -             if (!pmu)
>>> -                     continue;
>>> -
>>> -             ret = perf_pmu__cpus_match(pmu, cpus, &matched_cpus,
>>> -                                        &unmatched_cpus);
>>> -             if (ret)
>>> -                     goto out;
>>> -
>>> -             events_nr++;
>>> -
>>> -             if (perf_cpu_map__nr(matched_cpus) > 0 &&
>>> -                 (perf_cpu_map__nr(unmatched_cpus) > 0 ||
>>> -                  perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) ||
>>> -                  perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) {
>>> -                     perf_cpu_map__put(evsel->core.cpus);
>>> -                     perf_cpu_map__put(evsel->core.own_cpus);
>>> -                     evsel->core.cpus = perf_cpu_map__get(matched_cpus);
>>> -                     evsel->core.own_cpus = perf_cpu_map__get(matched_cpus);
>>> -
>>> -                     if (perf_cpu_map__nr(unmatched_cpus) > 0) {
>>> -                             cpu_map__snprint(matched_cpus, buf1, sizeof(buf1));
>>> -                             pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n",
>>> -                                        buf1, pmu->name, evsel->name);
>>> -                     }
>>> -             }
>>> -
>>> -             if (perf_cpu_map__nr(matched_cpus) == 0) {
>>> -                     evlist__remove(evlist, evsel);
>>> -                     evsel__delete(evsel);
>>> -
>>> -                     cpu_map__snprint(cpus, buf1, sizeof(buf1));
>>> -                     cpu_map__snprint(pmu->cpus, buf2, sizeof(buf2));
>>> -                     pr_warning("WARNING: %s isn't a '%s', please use a CPU list in the '%s' range (%s)\n",
>>> -                                buf1, pmu->name, pmu->name, buf2);
>>> -                     unmatched_count++;
>>> -             }
>>> -
>>> -             perf_cpu_map__put(matched_cpus);
>>> -             perf_cpu_map__put(unmatched_cpus);
>>> -     }
>>> -     if (events_nr)
>>> -             ret = (unmatched_count == events_nr) ? -1 : 0;
>>> -out:
>>> -     perf_cpu_map__put(cpus);
>>> -     return ret;
>>> -}
>>> diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h
>>> index aacdb1b0f948..19f74b4c340a 100644
>>> --- a/tools/perf/util/evlist-hybrid.h
>>> +++ b/tools/perf/util/evlist-hybrid.h
>>> @@ -10,6 +10,5 @@
>>>  int evlist__add_default_hybrid(struct evlist *evlist, bool precise);
>>>  void evlist__warn_hybrid_group(struct evlist *evlist);
>>>  bool evlist__has_hybrid(struct evlist *evlist);
>>> -int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list);
>>>
>>>  #endif /* __PERF_EVLIST_HYBRID_H */
>>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
>>> index a0504316b06f..5d0d99127a90 100644
>>> --- a/tools/perf/util/evlist.c
>>> +++ b/tools/perf/util/evlist.c
>>> @@ -2465,3 +2465,47 @@ void evlist__check_mem_load_aux(struct evlist *evlist)
>>>               }
>>>       }
>>>  }
>>> +
>>> +/**
>>> + * evlist__warn_user_requested_cpus() - Check each evsel against requested CPUs
>>> + *     and warn if the user CPU list is inapplicable for the event's PMUs
>>> + *     CPUs. Uncore PMUs list a CPU in sysfs, but this may be overwritten by a
>>> + *     user requested CPU and so any online CPU is applicable. Core PMUs handle
>>> + *     events on the CPUs in their list and otherwise the event isn't supported.
>>> + * @evlist: The list of events being checked.
>>> + * @cpu_list: The user provided list of CPUs.
>>> + */
>>> +void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list)
>>> +{
>>> +     struct perf_cpu_map *user_requested_cpus;
>>> +     struct evsel *pos;
>>> +
>>> +     if (!cpu_list)
>>> +             return;
>>> +
>>> +     user_requested_cpus = perf_cpu_map__new(cpu_list);
>>> +     if (!user_requested_cpus)
>>> +             return;
>>> +
>>> +     evlist__for_each_entry(evlist, pos) {
>>> +             const struct perf_cpu_map *to_test;
>>> +             struct perf_cpu cpu;
>>> +             int idx;
>>> +             bool warn = true;
>>> +             const struct perf_pmu *pmu = evsel__find_pmu(pos);
>>> +
>>> +             to_test = pmu && pmu->is_uncore ? cpu_map__online() : evsel__cpus(pos);
>>> +
>>> +             perf_cpu_map__for_each_cpu(cpu, idx, to_test) {
>>> +                     if (perf_cpu_map__has(user_requested_cpus, cpu)) {
>>> +                             warn = false;
>>> +                             break;
>>> +                     }
>>> +             }
>>> +             if (warn) {
>>> +                     pr_warning("WARNING: Requested CPU(s) '%s' not supported by PMU '%s' for event '%s'\n",
>>> +                             cpu_list, pmu ? pmu->name : "cpu", evsel__name(pos));
>>> +             }
>>> +     }
>>> +     perf_cpu_map__put(user_requested_cpus);
>>> +}
>>> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
>>> index e7e5540cc970..5e7ff44f3043 100644
>>> --- a/tools/perf/util/evlist.h
>>> +++ b/tools/perf/util/evlist.h
>>> @@ -447,4 +447,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
>>>
>>>  int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
>>>  void evlist__check_mem_load_aux(struct evlist *evlist);
>>> +void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
>>> +
>>>  #endif /* __PERF_EVLIST_H */
>>> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
>>> index f4f0afbc391c..1e0be23d4dd7 100644
>>> --- a/tools/perf/util/pmu.c
>>> +++ b/tools/perf/util/pmu.c
>>> @@ -2038,39 +2038,6 @@ int perf_pmu__match(char *pattern, char *name, char *tok)
>>>       return 0;
>>>  }
>>>
>>> -int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
>>> -                      struct perf_cpu_map **mcpus_ptr,
>>> -                      struct perf_cpu_map **ucpus_ptr)
>>> -{
>>> -     struct perf_cpu_map *pmu_cpus = pmu->cpus;
>>> -     struct perf_cpu_map *matched_cpus, *unmatched_cpus;
>>> -     struct perf_cpu cpu;
>>> -     int i, matched_nr = 0, unmatched_nr = 0;
>>> -
>>> -     matched_cpus = perf_cpu_map__default_new();
>>> -     if (!matched_cpus)
>>> -             return -1;
>>> -
>>> -     unmatched_cpus = perf_cpu_map__default_new();
>>> -     if (!unmatched_cpus) {
>>> -             perf_cpu_map__put(matched_cpus);
>>> -             return -1;
>>> -     }
>>> -
>>> -     perf_cpu_map__for_each_cpu(cpu, i, cpus) {
>>> -             if (!perf_cpu_map__has(pmu_cpus, cpu))
>>> -                     RC_CHK_ACCESS(unmatched_cpus)->map[unmatched_nr++] = cpu;
>>> -             else
>>> -                     RC_CHK_ACCESS(matched_cpus)->map[matched_nr++] = cpu;
>>> -     }
>>> -
>>> -     perf_cpu_map__set_nr(unmatched_cpus, unmatched_nr);
>>> -     perf_cpu_map__set_nr(matched_cpus, matched_nr);
>>> -     *mcpus_ptr = matched_cpus;
>>> -     *ucpus_ptr = unmatched_cpus;
>>> -     return 0;
>>> -}
>>> -
>>>  double __weak perf_pmu__cpu_slots_per_cycle(void)
>>>  {
>>>       return NAN;
>>> diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
>>> index 0e0cb6283594..49033bb134f3 100644
>>> --- a/tools/perf/util/pmu.h
>>> +++ b/tools/perf/util/pmu.h
>>> @@ -257,10 +257,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
>>>  bool perf_pmu__has_hybrid(void);
>>>  int perf_pmu__match(char *pattern, char *name, char *tok);
>>>
>>> -int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus,
>>> -                      struct perf_cpu_map **mcpus_ptr,
>>> -                      struct perf_cpu_map **ucpus_ptr);
>>> -
>>>  char *pmu_find_real_name(const char *name);
>>>  char *pmu_find_alias_name(const char *name);
>>>  double perf_pmu__cpu_slots_per_cycle(void);



More information about the linux-arm-kernel mailing list