[PATCH 1/3] perf: marvell: Add MPAM partid filtering to CN10K TAD PMU
Geetha sowjanya
gakula at marvell.com
Sun Jun 7 05:50:59 PDT 2026
From: Tanmay Jagdale <tanmay at marvell.com>
The TAD PMU exposes counters that can be filtered by MPAM partition id
for a subset of allocation and hit events.
Add a 16-bit partid format attribute (config1) and route counter
programming through variant-specific ops so CN10K keeps MPAM-capable
programming while Odyssey keeps the reduced event set without
advertising partid in sysfs.
Example:
perf stat -e tad/tad_alloc_any,partid=0x12/ -- <program>
Signed-off-by: Tanmay Jagdale <tanmay at marvell.com>
Signed-off-by: Geetha sowjanya <gakula at marvell.com>
---
drivers/perf/marvell_cn10k_tad_pmu.c | 151 ++++++++++++++++++++-------
1 file changed, 112 insertions(+), 39 deletions(-)
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 51ccb0befa05..7a3b659c999a 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "tad_pmu: " fmt
#include <linux/io.h>
+#include <linux/bits.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/cpuhotplug.h>
@@ -14,11 +15,13 @@
#include <linux/platform_device.h>
#include <linux/acpi.h>
-#define TAD_PFC_OFFSET 0x800
-#define TAD_PFC(counter) (TAD_PFC_OFFSET | (counter << 3))
#define TAD_PRF_OFFSET 0x900
-#define TAD_PRF(counter) (TAD_PRF_OFFSET | (counter << 3))
+#define TAD_PFC_OFFSET 0x800
+#define TAD_PFC(base, counter) ((base) | ((u64)(counter) << 3))
+#define TAD_PRF(base, counter) ((base) | ((u64)(counter) << 3))
#define TAD_PRF_CNTSEL_MASK 0xFF
+#define TAD_PRF_MATCH_PARTID BIT(8)
+#define TAD_PRF_PARTID_NS BIT(10)
#define TAD_MAX_COUNTERS 8
#define to_tad_pmu(p) (container_of(p, struct tad_pmu, pmu))
@@ -27,30 +30,92 @@ struct tad_region {
void __iomem *base;
};
+enum mrvl_tad_pmu_version {
+ TAD_PMU_V1 = 1,
+ TAD_PMU_V2,
+};
+
+struct tad_pmu_data {
+ int id;
+ u64 tad_prf_offset;
+ u64 tad_pfc_offset;
+};
+
struct tad_pmu {
struct pmu pmu;
struct tad_region *regions;
u32 region_cnt;
unsigned int cpu;
+ const struct tad_pmu_ops *ops;
+ const struct tad_pmu_data *pdata;
struct hlist_node node;
struct perf_event *events[TAD_MAX_COUNTERS];
DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS);
};
-enum mrvl_tad_pmu_version {
- TAD_PMU_V1 = 1,
- TAD_PMU_V2,
-};
-
-struct tad_pmu_data {
- int id;
+struct tad_pmu_ops {
+ void (*start_counter)(struct tad_pmu *pmu, struct perf_event *event);
};
static int tad_pmu_cpuhp_state;
+static void tad_pmu_start_counter(struct tad_pmu *pmu,
+ struct perf_event *event)
+{
+ const struct tad_pmu_data *pdata = pmu->pdata;
+ struct hw_perf_event *hwc = &event->hw;
+ u32 event_idx = event->attr.config;
+ u32 counter_idx = hwc->idx;
+ u64 partid_filter = 0;
+ u64 reg_val;
+ u32 partid;
+ int i;
+
+ partid = (u32)(event->attr.config1 & GENMASK(15, 0));
+
+ for (i = 0; i < pmu->region_cnt; i++)
+ writeq_relaxed(0, pmu->regions[i].base +
+ TAD_PFC(pdata->tad_pfc_offset, counter_idx));
+
+ if (partid && event_idx > 0x19 && event_idx < 0x21) {
+ partid_filter = TAD_PRF_MATCH_PARTID | TAD_PRF_PARTID_NS |
+ ((u64)partid << 11);
+ }
+
+
+ for (i = 0; i < pmu->region_cnt; i++) {
+ reg_val = event_idx & 0xFF;
+ reg_val |= partid_filter;
+ writeq_relaxed(reg_val, pmu->regions[i].base +
+ TAD_PRF(pdata->tad_prf_offset, counter_idx));
+ }
+}
+
+static void tad_pmu_v2_start_counter(struct tad_pmu *pmu,
+ struct perf_event *event)
+{
+ const struct tad_pmu_data *pdata = pmu->pdata;
+ struct hw_perf_event *hwc = &event->hw;
+ u32 event_idx = event->attr.config;
+ u32 counter_idx = hwc->idx;
+ u64 reg_val;
+ int i;
+
+ for (i = 0; i < pmu->region_cnt; i++)
+ writeq_relaxed(0, pmu->regions[i].base +
+ TAD_PFC(pdata->tad_pfc_offset, counter_idx));
+
+ for (i = 0; i < pmu->region_cnt; i++) {
+ reg_val = event_idx & 0xFF;
+ writeq_relaxed(reg_val, pmu->regions[i].base +
+ TAD_PRF(pdata->tad_prf_offset, counter_idx));
+ }
+}
+
static void tad_pmu_event_counter_read(struct perf_event *event)
{
struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+ const struct tad_pmu_data *pdata = tad_pmu->pdata;
struct hw_perf_event *hwc = &event->hw;
u32 counter_idx = hwc->idx;
u64 prev, new;
@@ -60,7 +125,7 @@ static void tad_pmu_event_counter_read(struct perf_event *event)
prev = local64_read(&hwc->prev_count);
for (i = 0, new = 0; i < tad_pmu->region_cnt; i++)
new += readq(tad_pmu->regions[i].base +
- TAD_PFC(counter_idx));
+ TAD_PFC(pdata->tad_pfc_offset, counter_idx));
} while (local64_cmpxchg(&hwc->prev_count, prev, new) != prev);
local64_add(new - prev, &event->count);
@@ -69,16 +134,14 @@ static void tad_pmu_event_counter_read(struct perf_event *event)
static void tad_pmu_event_counter_stop(struct perf_event *event, int flags)
{
struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+ const struct tad_pmu_data *pdata = tad_pmu->pdata;
struct hw_perf_event *hwc = &event->hw;
u32 counter_idx = hwc->idx;
int i;
- /* TAD()_PFC() stop counting on the write
- * which sets TAD()_PRF()[CNTSEL] == 0
- */
for (i = 0; i < tad_pmu->region_cnt; i++) {
writeq_relaxed(0, tad_pmu->regions[i].base +
- TAD_PRF(counter_idx));
+ TAD_PRF(pdata->tad_prf_offset, counter_idx));
}
tad_pmu_event_counter_read(event);
@@ -89,26 +152,10 @@ static void tad_pmu_event_counter_start(struct perf_event *event, int flags)
{
struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- u32 event_idx = event->attr.config;
- u32 counter_idx = hwc->idx;
- u64 reg_val;
- int i;
hwc->state = 0;
- /* Typically TAD_PFC() are zeroed to start counting */
- for (i = 0; i < tad_pmu->region_cnt; i++)
- writeq_relaxed(0, tad_pmu->regions[i].base +
- TAD_PFC(counter_idx));
-
- /* TAD()_PFC() start counting on the write
- * which sets TAD()_PRF()[CNTSEL] != 0
- */
- for (i = 0; i < tad_pmu->region_cnt; i++) {
- reg_val = event_idx & 0xFF;
- writeq_relaxed(reg_val, tad_pmu->regions[i].base +
- TAD_PRF(counter_idx));
- }
+ tad_pmu->ops->start_counter(tad_pmu, event);
}
static void tad_pmu_event_counter_del(struct perf_event *event, int flags)
@@ -128,7 +175,6 @@ static int tad_pmu_event_counter_add(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
int idx;
- /* Get a free counter for this event */
idx = find_first_zero_bit(tad_pmu->counters_map, TAD_MAX_COUNTERS);
if (idx == TAD_MAX_COUNTERS)
return -EAGAIN;
@@ -232,7 +278,7 @@ static struct attribute *ody_tad_pmu_event_attrs[] = {
TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e),
TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f),
TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20),
- TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF),
+ TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xff),
NULL
};
@@ -242,9 +288,11 @@ static const struct attribute_group ody_tad_pmu_events_attr_group = {
};
PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(partid, "config1:0-15");
static struct attribute *tad_pmu_format_attrs[] = {
&format_attr_event.attr,
+ &format_attr_partid.attr,
NULL
};
@@ -253,6 +301,16 @@ static struct attribute_group tad_pmu_format_attr_group = {
.attrs = tad_pmu_format_attrs,
};
+static struct attribute *ody_tad_pmu_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL
+};
+
+static struct attribute_group ody_tad_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = ody_tad_pmu_format_attrs,
+};
+
static ssize_t tad_pmu_cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -281,11 +339,19 @@ static const struct attribute_group *tad_pmu_attr_groups[] = {
static const struct attribute_group *ody_tad_pmu_attr_groups[] = {
&ody_tad_pmu_events_attr_group,
- &tad_pmu_format_attr_group,
+ &ody_tad_pmu_format_attr_group,
&tad_pmu_cpumask_attr_group,
NULL
};
+static const struct tad_pmu_ops tad_pmu_ops = {
+ .start_counter = tad_pmu_start_counter,
+};
+
+static const struct tad_pmu_ops tad_pmu_v2_ops = {
+ .start_counter = tad_pmu_v2_start_counter,
+};
+
static int tad_pmu_probe(struct platform_device *pdev)
{
const struct tad_pmu_data *dev_data;
@@ -312,6 +378,7 @@ static int tad_pmu_probe(struct platform_device *pdev)
return -ENODEV;
}
version = dev_data->id;
+ tad_pmu->pdata = dev_data;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
@@ -344,7 +411,6 @@ static int tad_pmu_probe(struct platform_device *pdev)
if (!regions)
return -ENOMEM;
- /* ioremap the distributed TAD pmu regions */
for (i = 0; i < tad_cnt && res->start < res->end; i++) {
regions[i].base = devm_ioremap(&pdev->dev,
res->start,
@@ -374,14 +440,16 @@ static int tad_pmu_probe(struct platform_device *pdev)
.read = tad_pmu_event_counter_read,
};
- if (version == TAD_PMU_V1)
+ if (version == TAD_PMU_V1) {
tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
- else
+ tad_pmu->ops = &tad_pmu_ops;
+ } else {
tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups;
+ tad_pmu->ops = &tad_pmu_v2_ops;
+ }
tad_pmu->cpu = raw_smp_processor_id();
- /* Register pmu instance for cpu hotplug */
ret = cpuhp_state_add_instance_nocalls(tad_pmu_cpuhp_state,
&tad_pmu->node);
if (ret) {
@@ -410,12 +478,17 @@ static void tad_pmu_remove(struct platform_device *pdev)
#if defined(CONFIG_OF) || defined(CONFIG_ACPI)
static const struct tad_pmu_data tad_pmu_data = {
.id = TAD_PMU_V1,
+ .tad_prf_offset = TAD_PRF_OFFSET,
+ .tad_pfc_offset = TAD_PFC_OFFSET,
};
+
#endif
#ifdef CONFIG_ACPI
static const struct tad_pmu_data tad_pmu_v2_data = {
.id = TAD_PMU_V2,
+ .tad_prf_offset = TAD_PRF_OFFSET,
+ .tad_pfc_offset = TAD_PFC_OFFSET,
};
#endif
--
2.25.1
More information about the linux-arm-kernel
mailing list