[PATCH 2/3] perf: Add driver for Arm NI-700 interconnect PMU
Will Deacon
will at kernel.org
Mon Jul 1 06:35:14 PDT 2024
On Thu, Apr 25, 2024 at 01:29:53PM +0100, Robin Murphy wrote:
> The Arm NI-700 Network-on-Chip Interconnect has a relatively
> straightforward design with a hierarchy of voltage, power, and clock
> domains, where each clock domain then contains a number of interface
> units and a PMU which can monitor events thereon. As such, it begets a
> relatively straightforward driver to interface those PMUs with perf.
>
> Even more so than with arm-cmn, users will require detailed knowledge of
> the wider system topology in order to meaningfully analyse anything,
> since the interconnect itself cannot know what lies beyond the boundary
> of each inscrutably-numbered interface. Given that, for now they are
> also expected to refer to the NI-700 documentation for the relevant
> event IDs to provide as well. An identifier is implemented so we can
> come back and add jevents if anyone really wants to.
>
> Signed-off-by: Robin Murphy <robin.murphy at arm.com>
> ---
> drivers/perf/Kconfig | 7 +
> drivers/perf/Makefile | 1 +
> drivers/perf/arm-ni.c | 767 ++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 775 insertions(+)
> create mode 100644 drivers/perf/arm-ni.c
Please can you add some documentation?
> +struct arm_ni {
> + struct device *dev;
> + void __iomem *base;
> + enum ni_part part;
> + int id;
> + int num_cds;
> + struct arm_ni_cd cds[];
> +};
Can you use that fancy new __counted_by thing here?
> +
> +#define cd_to_ni(cd) container_of((cd), struct arm_ni, cds[(cd)->id])
> +#define pmu_to_cd(p) container_of((p), struct arm_ni_cd, pmu)
> +
> +#define cd_for_each_unit(cd, u) \
> + for (struct arm_ni_unit *u = cd->units; u < cd->units + cd->num_units; u++)
> +
> +static int arm_ni_hp_state;
> +
> +struct arm_ni_event_attr {
> + struct device_attribute attr;
> + enum ni_node_type type;
> +};
> +
> +#define NI_EVENT_ATTR(_name, _type) \
> + (&((struct arm_ni_event_attr[]) {{ \
> + .attr = __ATTR(_name, 0444, arm_ni_event_show, NULL), \
> + .type = _type, \
> + }})[0].attr.attr)
> +
> +static ssize_t arm_ni_event_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct arm_ni_event_attr *eattr = container_of(attr, typeof(*eattr), attr);
> +
> + if (eattr->type == NI_PMU)
> + return sysfs_emit(buf, "type=0x%x\n", eattr->type);
> +
> + return sysfs_emit(buf, "type=0x%x,eventid=?,nodeid=?\n", eattr->type);
I'm struggling to see what this is doing. Please can you explain it a
bit? For example, does the perf tool parse those "=?" entries, and why
is it useful to print out only the NI_PMU value for the other events?
> +}
> +
> +static umode_t arm_ni_event_attr_is_visible(struct kobject *kobj,
> + struct attribute *attr, int unused)
> +{
> + struct device *dev = kobj_to_dev(kobj);
> + struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev));
> + struct arm_ni_event_attr *eattr;
> +
> + eattr = container_of(attr, typeof(*eattr), attr.attr);
> +
> + cd_for_each_unit(cd, unit) {
> + if (unit->type == eattr->type && unit->ns)
> + return attr->mode;
> + }
> +
> + return 0;
> +}
> +
> +static struct attribute *arm_ni_event_attrs[] = {
> + NI_EVENT_ATTR(asni, NI_ASNI),
> + NI_EVENT_ATTR(amni, NI_AMNI),
> + NI_EVENT_ATTR(cycles, NI_PMU),
> + NI_EVENT_ATTR(hsni, NI_HSNI),
> + NI_EVENT_ATTR(hmni, NI_HMNI),
> + NI_EVENT_ATTR(pmni, NI_PMNI),
> + NULL
> +};
[...]
> +static bool arm_ni_validate_event(struct perf_event *this,
> + struct perf_event *that, int *count)
> +{
> + if (is_software_event(this))
> + return true;
> + if (this->pmu != that->pmu)
> + return false;
> + return --count[NI_EVENT_TYPE(this) == NI_PMU] >= 0;
This is pretty horrible to read :/
I don't know the details of this PMU, so is the count array saying that
you can have NI_NUM_COUNTERS events with the NI_PMU type, but only one
event of any other type?
> +}
> +
> +static int arm_ni_validate_group(struct perf_event *event)
> +{
> + struct perf_event *sibling, *leader;
> + int count[2] = {NI_NUM_COUNTERS, 1};
> +
> + arm_ni_validate_event(event, event, count);
> +
> + leader = event->group_leader;
> + if (leader != event && !arm_ni_validate_event(leader, event, count))
> + return - EINVAL;
> +
> + for_each_sibling_event(sibling, leader) {
> + if (!arm_ni_validate_event(sibling, event, count))
> + return - EINVAL;
> + }
> + return 0;
> +}
> +
> +static int arm_ni_event_init(struct perf_event *event)
> +{
> + struct arm_ni_cd *cd = pmu_to_cd(event->pmu);
> +
> + if (event->attr.type != event->pmu->type)
> + return -ENOENT;
> +
> + if (is_sampling_event(event))
> + return -EINVAL;
> +
> + event->cpu = cd->cpu;
> + if (NI_EVENT_TYPE(event) == NI_PMU)
> + return arm_ni_validate_group(event);
> +
> + cd_for_each_unit(cd, unit) {
> + if (unit->type == NI_EVENT_TYPE(event) &&
> + unit->id == NI_EVENT_NODEID(event) && unit->ns) {
> + event->hw.config_base = (unsigned long)unit;
> + return arm_ni_validate_group(event);
> + }
> + }
> + return -EINVAL;
> +}
> +
> +static u64 arm_ni_read_ccnt(struct arm_ni_cd *cd)
> +{
> + u64 l, u_old, u_new;
> +
> + u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U);
> + do {
> + u_old = u_new;
> + l = readl_relaxed(cd->pmu_base + NI_PMCCNTR_L);
> + u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U);
> + } while (u_new != u_old);
> +
> + return (u_new << 32) | l;
> +}
oh man, they really don't have 64-bit registers on this thing? If we
have to loop like this, can we add a timeout just in case the hardware
goes wonky? It's like mixing together the worst parts of iopoll.h and
io-64-nonatomic-lo-hi.h.
There are a few other PMU drivers that look like they could share that
code if you added it as a generic helper.
Will
More information about the linux-arm-kernel
mailing list