[PATCH 1/3] perf vendor events arm64: fix swapped MetricGroup for Tegra410 L1 prefetcher metrics
Saurav Sachidanand
sauravsc at amazon.com
Mon May 11 14:08:32 PDT 2026
The L1D prefetcher metrics (l1d_cache_rw_miss_ratio, l1d_demand_accesses,
etc.) were incorrectly assigned to MetricGroup "L1I_Prefetcher_Effectiveness"
and vice versa. This causes 'perf stat --metric-group L1D_Prefetcher_Effectiveness'
to display L1I metrics, which is misleading.
Swap the MetricGroup assignments so L1D metrics are in the L1D group and
L1I metrics are in the L1I group.
Fixes: 86ff690f45cc ("perf vendor events arm64: Add Tegra410 Olympus PMU events")
Signed-off-by: Saurav Sachidanand <sauravsc at amazon.com>
---
.../arch/arm64/nvidia/t410/metrics.json | 28 +++++++++----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/tools/perf/pmu-events/arch/arm64/nvidia/t410/metrics.json b/tools/perf/pmu-events/arch/arm64/nvidia/t410/metrics.json
index b825ede03f544..59474ccb7417f 100644
--- a/tools/perf/pmu-events/arch/arm64/nvidia/t410/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/nvidia/t410/metrics.json
@@ -346,42 +346,42 @@
"MetricExpr": "l1d_demand_misses / l1d_demand_accesses",
"BriefDescription": "This metric measures the ratio of L1 D-cache Read accesses missed to the total number of L1 D-cache accesses. This gives an indication of the effectiveness of the L1 D-cache for demand Load or Store traffic.",
"ScaleUnit": "1per cache access",
- "MetricGroup": "L1I_Prefetcher_Effectiveness"
+ "MetricGroup": "L1D_Prefetcher_Effectiveness"
},
{
"MetricName": "l1d_demand_accesses",
"MetricExpr": "L1D_CACHE_RW",
"BriefDescription": "This metric measures the count of L1 D-cache accesses incurred on Load or Store by the instruction stream of the program.",
"ScaleUnit": "1count",
- "MetricGroup": "L1I_Prefetcher_Effectiveness"
+ "MetricGroup": "L1D_Prefetcher_Effectiveness"
},
{
"MetricName": "l1d_demand_misses",
"MetricExpr": "L1D_CACHE_REFILL_RW",
"BriefDescription": "This metric measures the count of L1 D-cache misses incurred on a Load or Store by the instruction stream of the program.",
"ScaleUnit": "1count",
- "MetricGroup": "L1I_Prefetcher_Effectiveness"
+ "MetricGroup": "L1D_Prefetcher_Effectiveness"
},
{
"MetricName": "l1d_prf_accuracy",
"MetricExpr": "100 * (l1d_useful_prf / l1d_refilled_prf)",
"BriefDescription": "This metric measures the fraction of prefetched memory addresses that are used by the instruction stream.",
"ScaleUnit": "1percent of prefetch",
- "MetricGroup": "L1I_Prefetcher_Effectiveness"
+ "MetricGroup": "L1D_Prefetcher_Effectiveness"
},
{
"MetricName": "l1d_prf_coverage",
"MetricExpr": "100 * (l1d_useful_prf / (l1d_demand_misses + l1d_refilled_prf))",
"BriefDescription": "This metric measures the baseline demand cache misses which the prefetcher brings into the cache.",
"ScaleUnit": "1percent of cache access",
- "MetricGroup": "L1I_Prefetcher_Effectiveness"
+ "MetricGroup": "L1D_Prefetcher_Effectiveness"
},
{
"MetricName": "l1d_refilled_prf",
"MetricExpr": "L1D_CACHE_REFILL_HWPRF + L1D_CACHE_REFILL_PRFM + L1D_LFB_HIT_RW_FHWPRF + L1D_LFB_HIT_RW_FPRFM",
"BriefDescription": "This metric measures the count of cache lines refilled by L1 data prefetcher (hardware prefetches or software preload) into L1 D-cache.",
"ScaleUnit": "1count",
- "MetricGroup": "L1I_Prefetcher_Effectiveness"
+ "MetricGroup": "L1D_Prefetcher_Effectiveness"
},
{
"MetricName": "l1d_tlb_miss_ratio",
@@ -402,7 +402,7 @@
"MetricExpr": "L1D_CACHE_HIT_RW_FPRF + L1D_LFB_HIT_RW_FHWPRF + L1D_LFB_HIT_RW_FPRFM",
"BriefDescription": "This metric measures the count of cache lines refilled by L1 data prefetcher (hardware prefetches or software preload) into L1 D-cache which are further used by Load or Store from the instruction stream of the program.",
"ScaleUnit": "1count",
- "MetricGroup": "L1I_Prefetcher_Effectiveness"
+ "MetricGroup": "L1D_Prefetcher_Effectiveness"
},
{
"MetricName": "l1i_cache_miss_ratio",
@@ -423,42 +423,42 @@
"MetricExpr": "l1i_demand_misses / l1i_demand_accesses",
"BriefDescription": "This metric measures the ratio of L1 I-cache Read accesses missed to the total number of L1 I-cache accesses. This gives an indication of the effectiveness of the L1 I-cache for demand instruction fetch traffic. Note that cache accesses in this cache are demand instruction fetch.",
"ScaleUnit": "1per cache access",
- "MetricGroup": "L1D_Prefetcher_Effectiveness"
+ "MetricGroup": "L1I_Prefetcher_Effectiveness"
},
{
"MetricName": "l1i_demand_accesses",
"MetricExpr": "L1I_CACHE_RD",
"BriefDescription": "This metric measures the count of L1 I-cache accesses caused by an instruction fetch by the instruction stream of the program.",
"ScaleUnit": "1count",
- "MetricGroup": "L1D_Prefetcher_Effectiveness"
+ "MetricGroup": "L1I_Prefetcher_Effectiveness"
},
{
"MetricName": "l1i_demand_misses",
"MetricExpr": "L1I_CACHE_REFILL_RD",
"BriefDescription": "This metric measures the count of L1 I-cache misses caused by an instruction fetch by the instruction stream of the program.",
"ScaleUnit": "1count",
- "MetricGroup": "L1D_Prefetcher_Effectiveness"
+ "MetricGroup": "L1I_Prefetcher_Effectiveness"
},
{
"MetricName": "l1i_prf_accuracy",
"MetricExpr": "100 * (l1i_useful_prf / l1i_refilled_prf)",
"BriefDescription": "This metric measures the fraction of prefetched memory addresses that are used by the instruction stream.",
"ScaleUnit": "1percent of prefetch",
- "MetricGroup": "L1D_Prefetcher_Effectiveness"
+ "MetricGroup": "L1I_Prefetcher_Effectiveness"
},
{
"MetricName": "l1i_prf_coverage",
"MetricExpr": "100 * (l1i_useful_prf / (l1i_demand_misses + l1i_refilled_prf))",
"BriefDescription": "This metric measures the baseline demand cache misses which the prefetcher brings into the cache.",
"ScaleUnit": "1percent of cache access",
- "MetricGroup": "L1D_Prefetcher_Effectiveness"
+ "MetricGroup": "L1I_Prefetcher_Effectiveness"
},
{
"MetricName": "l1i_refilled_prf",
"MetricExpr": "L1I_CACHE_REFILL_HWPRF + L1I_CACHE_REFILL_PRFM",
"BriefDescription": "This metric measures the count of cache lines refilled by L1 instruction prefetcher (hardware prefetches or software preload) into L1 I-cache.",
"ScaleUnit": "1count",
- "MetricGroup": "L1D_Prefetcher_Effectiveness"
+ "MetricGroup": "L1I_Prefetcher_Effectiveness"
},
{
"MetricName": "l1i_tlb_miss_ratio",
@@ -479,7 +479,7 @@
"MetricExpr": "L1I_CACHE_HIT_RD_FPRF",
"BriefDescription": "This metric measures the count of cache lines refilled by L1 instruction prefetcher (hardware prefetches or software preload) into L1 I-cache which are further used by instruction stream of the program.",
"ScaleUnit": "1count",
- "MetricGroup": "L1D_Prefetcher_Effectiveness"
+ "MetricGroup": "L1I_Prefetcher_Effectiveness"
},
{
"MetricName": "l2_cache_miss_ratio",
--
2.47.3
More information about the linux-arm-kernel
mailing list