[PATCH v3 4/9] drivers/perf: hisi: Add new functions for L3C PMU
Shaokun Zhang
zhangshaokun at hisilicon.com
Mon Feb 8 02:20:27 EST 2021
On HiSilicon Hip09 platform, some new functions are enhanced on L3C PMU:
* tt_req: it is the abbreviation of tracetag request and allows user to
count only read/write/atomic operations. tt_req is 3-bit and details are
listed in the hisi-pmu document.
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5
* tt_core: it is the abbreviation of tracetag core and allows user to
filter by core/thread within the cluster, it is a 8-bit bitmap that each
bit represents the corresponding core/thread in this L3C.
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0xf/ sleep 5
* datasrc_cfg: it is the abbreviation of data source configuration and
allows user to check where the data comes from, such as: from local DDR,
cross-die DDR or cross-socket DDR. Its is 5-bit and represents different
data source in the SoC.
$# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0xe/ sleep 5
* datasrc_skt: it is the abbreviation of data source from another socket
and is used in the multi-chips, if user wants to check the cross-socket
datat source, it shall be added in perf command. Only one bit is used to
control this.
$# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0x10,datasrc_skt=1/ sleep 5
Cc: Mark Rutland <mark.rutland at arm.com>
Cc: Will Deacon <will at kernel.org>
Cc: John Garry <john.garry at huawei.com>
Cc: Jonathan Cameron <Jonathan.Cameron at huawei.com>
Reviewed-by: John Garry <john.garry at huawei.com>
Co-developed-by: Qi Liu <liuqi115 at huawei.com>
Signed-off-by: Qi Liu <liuqi115 at huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun at hisilicon.com>
---
drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 259 +++++++++++++++++++++++++--
drivers/perf/hisilicon/hisi_uncore_pmu.c | 8 +-
drivers/perf/hisilicon/hisi_uncore_pmu.h | 11 ++
3 files changed, 258 insertions(+), 20 deletions(-)
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 2b26386480dd..bf9f7772cac9 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -23,12 +23,17 @@
#define L3C_INT_MASK 0x0800
#define L3C_INT_STATUS 0x0808
#define L3C_INT_CLEAR 0x080c
+#define L3C_CORE_CTRL 0x1b04
+#define L3C_TRACETAG_CTRL 0x1b20
+#define L3C_DATSRC_TYPE 0x1b48
+#define L3C_DATSRC_CTRL 0x1bf0
#define L3C_EVENT_CTRL 0x1c00
#define L3C_VERSION 0x1cf0
#define L3C_EVENT_TYPE0 0x1d00
/*
- * Each counter is 48-bits and [48:63] are reserved
- * which are Read-As-Zero and Writes-Ignored.
+ * If the HW version only supports a 48-bit counter, then
+ * bits [63:48] are reserved, which are Read-As-Zero and
+ * Writes-Ignored.
*/
#define L3C_CNTR0_LOWER 0x1e00
@@ -36,8 +41,186 @@
#define L3C_NR_COUNTERS 0x8
#define L3C_PERF_CTRL_EN 0x10000
+#define L3C_TRACETAG_EN BIT(31)
+#define L3C_TRACETAG_REQ_SHIFT 7
+#define L3C_TRACETAG_MARK_EN BIT(0)
+#define L3C_TRACETAG_REQ_EN (L3C_TRACETAG_MARK_EN | BIT(2))
+#define L3C_TRACETAG_CORE_EN (L3C_TRACETAG_MARK_EN | BIT(3))
+#define L3C_CORE_EN BIT(20)
+#define L3C_COER_NONE 0x0
+#define L3C_DATSRC_MASK 0xFF
+#define L3C_DATSRC_SKT_EN BIT(23)
+#define L3C_DATSRC_NONE 0x0
#define L3C_EVTYPE_NONE 0xff
#define L3C_V1_NR_EVENTS 0x59
+#define L3C_V2_NR_EVENTS 0xFF
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);
+
+static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ u32 tt_req = hisi_get_tt_req(event);
+
+ if (tt_req) {
+ u32 val;
+
+ /* Set request-type for tracetag */
+ val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
+ val |= L3C_TRACETAG_REQ_EN;
+ writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+ /* Enable request-tracetag statistics */
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val |= L3C_TRACETAG_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ }
+}
+
+static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ u32 tt_req = hisi_get_tt_req(event);
+
+ if (tt_req) {
+ u32 val;
+
+ /* Clear request-type */
+ val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
+ val &= ~L3C_TRACETAG_REQ_EN;
+ writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+ /* Disable request-tracetag statistics */
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val &= ~L3C_TRACETAG_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+ }
+}
+
+static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u32 reg, reg_idx, shift, val;
+ int idx = hwc->idx;
+
+ /*
+ * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
+ * There are 2 datasource ctrl register for the 8 hardware counters.
+ * Datasrc is 8-bits and for the former 4 hardware counters,
+ * L3C_DATSRC_TYPE0 is chosen. For the latter 4 hardware counters,
+ * L3C_DATSRC_TYPE1 is chosen.
+ */
+ reg = L3C_DATSRC_TYPE + (idx / 4) * 4;
+ reg_idx = idx % 4;
+ shift = 8 * reg_idx;
+
+ val = readl(l3c_pmu->base + reg);
+ val &= ~(L3C_DATSRC_MASK << shift);
+ val |= ds_cfg << shift;
+ writel(val, l3c_pmu->base + reg);
+}
+
+static void hisi_l3c_pmu_config_ds(struct perf_event *event)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ u32 ds_cfg = hisi_get_datasrc_cfg(event);
+ u32 ds_skt = hisi_get_datasrc_skt(event);
+
+ if (ds_cfg)
+ hisi_l3c_pmu_write_ds(event, ds_cfg);
+
+ if (ds_skt) {
+ u32 val;
+
+ val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+ val |= L3C_DATSRC_SKT_EN;
+ writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+ }
+}
+
+static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ u32 ds_cfg = hisi_get_datasrc_cfg(event);
+ u32 ds_skt = hisi_get_datasrc_skt(event);
+
+ if (ds_cfg)
+ hisi_l3c_pmu_write_ds(event, L3C_DATSRC_NONE);
+
+ if (ds_skt) {
+ u32 val;
+
+ val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+ val &= ~L3C_DATSRC_SKT_EN;
+ writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+ }
+}
+
+static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ u32 core = hisi_get_tt_core(event);
+
+ if (core) {
+ u32 val;
+
+ /* Config and enable core information */
+ writel(core, l3c_pmu->base + L3C_CORE_CTRL);
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val |= L3C_CORE_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+ /* Enable core-tracetag statistics */
+ val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val |= L3C_TRACETAG_CORE_EN;
+ writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ }
+}
+
+static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
+{
+ struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+ u32 core = hisi_get_tt_core(event);
+
+ if (core) {
+ u32 val;
+
+ /* Clear core information */
+ writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val &= ~L3C_CORE_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+ /* Disable core-tracetag statistics */
+ val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+ val &= ~L3C_TRACETAG_CORE_EN;
+ writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+ }
+}
+
+static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
+{
+ if (event->attr.config1 != 0x0) {
+ hisi_l3c_pmu_config_req_tracetag(event);
+ hisi_l3c_pmu_config_core_tracetag(event);
+ hisi_l3c_pmu_config_ds(event);
+ }
+}
+
+static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
+{
+ if (event->attr.config1 != 0x0) {
+ hisi_l3c_pmu_clear_ds(event);
+ hisi_l3c_pmu_clear_core_tracetag(event);
+ hisi_l3c_pmu_clear_req_tracetag(event);
+ }
+}
/*
* Select the counter register offset using the counter index
@@ -50,14 +233,12 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc)
{
- /* Read 64-bits and the upper 16 bits are RAZ */
return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
}
static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc, u64 val)
{
- /* Write 64-bits and the upper 16 bits are WI */
writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
}
@@ -166,23 +347,14 @@ static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)
static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
{ "HISI0213", },
- {},
+ { "HISI0214", },
+ {}
};
MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
- unsigned long long id;
- acpi_status status;
-
- status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
- "_UID", NULL, &id);
- if (ACPI_FAILURE(status))
- return -EINVAL;
-
- l3c_pmu->index_id = id;
-
/*
* Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
* SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
@@ -220,6 +392,20 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
.attrs = hisi_l3c_pmu_v1_format_attr,
};
+static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
+ HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
+ HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
+ HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
+ NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
+ .name = "format",
+ .attrs = hisi_l3c_pmu_v2_format_attr,
+};
+
static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00),
HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01),
@@ -242,6 +428,19 @@ static const struct attribute_group hisi_l3c_pmu_v1_events_group = {
.attrs = hisi_l3c_pmu_v1_events_attr,
};
+static struct attribute *hisi_l3c_pmu_v2_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(l3c_hit, 0x48),
+ HISI_PMU_EVENT_ATTR(cycles, 0x7f),
+ HISI_PMU_EVENT_ATTR(l3c_ref, 0xb8),
+ HISI_PMU_EVENT_ATTR(dat_access, 0xb9),
+ NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
+ .name = "events",
+ .attrs = hisi_l3c_pmu_v2_events_attr,
+};
+
static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
@@ -273,6 +472,14 @@ static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
NULL,
};
+static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
+ &hisi_l3c_pmu_v2_format_group,
+ &hisi_l3c_pmu_v2_events_group,
+ &hisi_l3c_pmu_cpumask_attr_group,
+ &hisi_l3c_pmu_identifier_group,
+ NULL
+};
+
static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.write_evtype = hisi_l3c_pmu_write_evtype,
.get_event_idx = hisi_uncore_pmu_get_event_idx,
@@ -286,6 +493,8 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.read_counter = hisi_l3c_pmu_read_counter,
.get_int_status = hisi_l3c_pmu_get_int_status,
.clear_int_status = hisi_l3c_pmu_clear_int_status,
+ .enable_filter = hisi_l3c_pmu_enable_filter,
+ .disable_filter = hisi_l3c_pmu_disable_filter,
};
static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
@@ -301,12 +510,20 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
if (ret)
return ret;
+ if (l3c_pmu->identifier >= HISI_PMU_V2) {
+ l3c_pmu->counter_bits = 64;
+ l3c_pmu->check_event = L3C_V2_NR_EVENTS;
+ l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
+ } else {
+ l3c_pmu->counter_bits = 48;
+ l3c_pmu->check_event = L3C_V1_NR_EVENTS;
+ l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
+ }
+
l3c_pmu->num_counters = L3C_NR_COUNTERS;
- l3c_pmu->counter_bits = 48;
l3c_pmu->ops = &hisi_uncore_l3c_ops;
l3c_pmu->dev = &pdev->dev;
l3c_pmu->on_cpu = -1;
- l3c_pmu->check_event = L3C_V1_NR_EVENTS;
return 0;
}
@@ -334,8 +551,12 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
return ret;
}
+ /*
+ * CCL_ID is used to identify the L3C in the same SCCL which was
+ * used _UID by mistake.
+ */
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
- l3c_pmu->sccl_id, l3c_pmu->index_id);
+ l3c_pmu->sccl_id, l3c_pmu->ccl_id);
l3c_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
@@ -348,7 +569,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
.start = hisi_uncore_pmu_start,
.stop = hisi_uncore_pmu_stop,
.read = hisi_uncore_pmu_read,
- .attr_groups = hisi_l3c_pmu_v1_attr_groups,
+ .attr_groups = l3c_pmu->pmu_events.attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 29e2f94a190d..e20e4c9b278a 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -21,7 +21,7 @@
#include "hisi_uncore_pmu.h"
#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
-#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1)
+#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))
/*
* PMU format attributes
@@ -245,6 +245,9 @@ static void hisi_uncore_pmu_enable_event(struct perf_event *event)
hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
HISI_GET_EVENTID(event));
+ if (hisi_pmu->ops->enable_filter)
+ hisi_pmu->ops->enable_filter(event);
+
hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
}
@@ -257,6 +260,9 @@ static void hisi_uncore_pmu_disable_event(struct perf_event *event)
struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ if (hisi_pmu->ops->disable_filter)
+ hisi_pmu->ops->disable_filter(event);
+
hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);
}
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index 933020c99e3e..1591dbc6a119 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -11,6 +11,7 @@
#ifndef __HISI_UNCORE_PMU_H__
#define __HISI_UNCORE_PMU_H__
+#include <linux/bitfield.h>
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/kernel.h>
@@ -22,6 +23,7 @@
#undef pr_fmt
#define pr_fmt(fmt) "hisi_pmu: " fmt
+#define HISI_PMU_V2 0x30
#define HISI_MAX_COUNTERS 0x10
#define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu))
@@ -35,6 +37,12 @@
#define HISI_PMU_EVENT_ATTR(_name, _config) \
HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
+#define HISI_PMU_EVENT_ATTR_EXTRACTOR(name, config, hi, lo) \
+ static inline u32 hisi_get_##name(struct perf_event *event) \
+ { \
+ return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \
+ }
+
struct hisi_pmu;
struct hisi_uncore_ops {
@@ -50,11 +58,14 @@ struct hisi_uncore_ops {
void (*stop_counters)(struct hisi_pmu *);
u32 (*get_int_status)(struct hisi_pmu *hisi_pmu);
void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx);
+ void (*enable_filter)(struct perf_event *event);
+ void (*disable_filter)(struct perf_event *event);
};
struct hisi_pmu_hwevents {
struct perf_event *hw_events[HISI_MAX_COUNTERS];
DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
+ const struct attribute_group **attr_groups;
};
/* Generic pmu struct for different pmu types */
--
2.7.4
More information about the linux-arm-kernel
mailing list