[PATCH v2] Added perf functionality to mmdc driver

Zhengyu Shen zhengyu.shen at nxp.com
Mon Aug 15 15:30:35 PDT 2016


MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64 and
LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high performance,
and optimized. MMDC is present on i.MX6 Quad and i.MX6 QuadPlus devices, but
this driver only supports i.MX6 Quad at the moment. MMDC provides registers
for performance counters which read via this driver to help debug memory
throughput and similar issues.

$ perf stat -a -e mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/ dd if=/dev/zero of=/dev/null bs=1M count=5000
Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':

         898021787      mmdc/busy-cycles/
          14819600      mmdc/read-accesses/
            471.30 MB   mmdc/read-bytes/
        2815419216      mmdc/total-cycles/
          13367354      mmdc/write-accesses/
            427.76 MB   mmdc/write-bytes/

       5.334757334 seconds time elapsed

Signed-off-by: Zhengyu Shen <zhengyu.shen at nxp.com>
---
change from v1 to v2:
	Added cpumask and migration handling support to driver
	Validated event during event_init
	Added code to properly stop counters
	Used perf_invalid_context instead of perf_sw_context
	Added hrtimer to poll for overflow 
	Added better description
	Added support for multiple mmdcs

 arch/arm/mach-imx/mmdc.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 361 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index db9621c..372b59c 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011 Freescale Semiconductor, Inc.
+ * Copyright 2011,2016 Freescale Semiconductor, Inc.
  * Copyright 2011 Linaro Ltd.
  *
  * The code contained herein is licensed under the GNU General Public
@@ -16,6 +16,10 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
 
 #include "common.h"
 
@@ -27,14 +31,341 @@
 #define BM_MMDC_MDMISC_DDR_TYPE	0x18
 #define BP_MMDC_MDMISC_DDR_TYPE	0x3
 
+#define TOTAL_CYCLES	0x1
+#define BUSY_CYCLES		0x2
+#define READ_ACCESSES	0x3
+#define WRITE_ACCESSES	0x4
+#define READ_BYTES		0x5
+#define WRITE_BYTES		0x6
+
+/* Enables, resets, freezes, overflow profiling*/
+#define DBG_DIS			0x0
+#define DBG_EN			0x1 
+#define DBG_RST			0x2
+#define PRF_FRZ			0x4
+#define CYC_OVF 		0x8
+
+#define MMDC_MADPCR0	0x410
+#define MMDC_MADPSR0	0x418
+#define MMDC_MADPSR1	0x41C
+#define MMDC_MADPSR2	0x420
+#define MMDC_MADPSR3	0x424
+#define MMDC_MADPSR4	0x428
+#define MMDC_MADPSR5	0x42C
+
+#define MMDC_NUM_COUNTERS	6
+
+#define to_mmdc_pmu(p) (container_of(p, struct mmdc_pmu, pmu))
+
+static DEFINE_IDA(mmdc_ida);
+
 static int ddr_type;
 
+PMU_EVENT_ATTR_STRING(total-cycles, mmdc_total_cycles, "event=0x01")
+PMU_EVENT_ATTR_STRING(busy-cycles, mmdc_busy_cycles, "event=0x02")
+PMU_EVENT_ATTR_STRING(read-accesses, mmdc_read_accesses, "event=0x03")
+PMU_EVENT_ATTR_STRING(write-accesses, mmdc_write_accesses, "config=0x04")
+PMU_EVENT_ATTR_STRING(read-bytes, mmdc_read_bytes, "event=0x05")
+PMU_EVENT_ATTR_STRING(read-bytes.unit, mmdc_read_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(read-bytes.scale, mmdc_read_bytes_scale, "0.000001");
+PMU_EVENT_ATTR_STRING(write-bytes, mmdc_write_bytes, "event=0x06")
+PMU_EVENT_ATTR_STRING(write-bytes.unit, mmdc_write_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(write-bytes.scale, mmdc_write_bytes_scale, "0.000001");
+
+struct mmdc_pmu
+{
+	struct pmu pmu;
+	void __iomem *mmdc_base;
+	cpumask_t cpu;
+	struct notifier_block cpu_nb;
+	struct hrtimer hrtimer;
+	unsigned int irq;
+	struct device *dev;
+	struct perf_event *mmdc_events[MMDC_NUM_COUNTERS];
+};
+
+static unsigned int mmdc_poll_period_us = 1000000;
+module_param_named(pmu_poll_period_us, mmdc_poll_period_us, uint,
+		        S_IRUGO | S_IWUSR);
+
+static ktime_t mmdc_timer_period(void)
+{
+	return ns_to_ktime((u64)mmdc_poll_period_us * 1000);
+}
+
+static ssize_t mmdc_cpumask_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmdc_pmu *pmu_mmdc = dev_get_drvdata(dev);
+	return cpumap_print_to_pagebuf(true, buf, &pmu_mmdc->cpu);
+}
+
+static struct device_attribute mmdc_cpumask_attr =
+__ATTR(cpumask, S_IRUGO, mmdc_cpumask_show, NULL);
+
+static struct attribute *mmdc_cpumask_attrs[] = {
+	&mmdc_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group mmdc_cpumask_attr_group = {
+	.attrs = mmdc_cpumask_attrs,
+};
+
+static struct attribute *mmdc_events_attrs[] = {
+	&mmdc_total_cycles.attr.attr,
+	&mmdc_busy_cycles.attr.attr,
+	&mmdc_read_accesses.attr.attr,
+	&mmdc_write_accesses.attr.attr,
+	&mmdc_read_bytes.attr.attr,
+	&mmdc_read_bytes_unit.attr.attr,
+	&mmdc_read_bytes_scale.attr.attr,
+	&mmdc_write_bytes.attr.attr,
+	&mmdc_write_bytes_unit.attr.attr,
+	&mmdc_write_bytes_scale.attr.attr,
+	NULL,
+};
+
+static struct attribute_group mmdc_events_attr_group = {
+	.name = "events",
+	.attrs = mmdc_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+static struct attribute *mmdc_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group mmdc_format_attr_group = {
+	.name = "format",
+	.attrs = mmdc_format_attrs,
+};
+
+static const struct attribute_group * attr_groups[] = {
+	&mmdc_events_attr_group,
+	&mmdc_format_attr_group,
+	&mmdc_cpumask_attr_group,
+	NULL,
+};
+
+static u32 mmdc_read_counter(struct mmdc_pmu *pmu_mmdc, int cfg, u64 prev_val)
+{
+	u32 val;
+	void __iomem *mmdc_base, *reg;
+	mmdc_base = pmu_mmdc->mmdc_base;
+
+	switch (cfg)
+	{
+		case TOTAL_CYCLES:
+			reg = mmdc_base + MMDC_MADPSR0;
+			break;
+		case BUSY_CYCLES:
+			reg = mmdc_base + MMDC_MADPSR1;
+			break;
+		case READ_ACCESSES:
+			reg = mmdc_base + MMDC_MADPSR2;
+			break;
+		case WRITE_ACCESSES:
+			reg = mmdc_base + MMDC_MADPSR3;
+			break;
+		case READ_BYTES:
+			reg = mmdc_base + MMDC_MADPSR4;
+			break;
+		case WRITE_BYTES:
+			reg = mmdc_base + MMDC_MADPSR5;
+			break;
+		default:
+			return -1;
+	}
+	val = readl_relaxed(reg);
+	return val;
+}
+
+static void mmdc_enable_profiling(struct perf_event *event)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	void __iomem *mmdc_base, *reg;
+
+	mmdc_base = pmu_mmdc->mmdc_base;
+	reg = mmdc_base + MMDC_MADPCR0;
+	writel_relaxed(CYC_OVF | DBG_RST, reg);
+	writel_relaxed(DBG_EN, reg);
+}
+
+static int mmdc_cpu_notifier(struct notifier_block *nb,
+        unsigned long action, void *hcpu)
+{
+	struct mmdc_pmu *pmu_mmdc = container_of(nb, struct mmdc_pmu, cpu_nb);
+	unsigned int cpu = (long)hcpu; /* for (long) see kernel/cpu.c */
+	unsigned int target;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+		case CPU_DOWN_PREPARE:
+			if (!cpumask_test_and_clear_cpu(cpu, &pmu_mmdc->cpu))
+				break;
+			target = cpumask_any_but(cpu_online_mask, cpu);
+			if (target >= nr_cpu_ids)
+				break;
+			perf_pmu_migrate_context(&pmu_mmdc->pmu, cpu, target);
+			cpumask_set_cpu(target, &pmu_mmdc->cpu);
+		default:
+			break;
+    }
+
+	return NOTIFY_OK;
+}
+
+static int mmdc_event_init(struct perf_event *event)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (event->cpu < 0) {
+		dev_warn(pmu_mmdc->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->attr.exclude_user   ||
+			event->attr.exclude_kernel ||
+			event->attr.exclude_hv     ||
+			event->attr.exclude_idle   ||
+			event->attr.exclude_host)
+		return -EINVAL;
+
+	mmdc_enable_profiling(event);
+	event->cpu = cpumask_first(&pmu_mmdc->cpu);
+	local64_set(&event->count, 0);
+
+	return 0;
+}
+
+static void mmdc_event_update(struct perf_event * event)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	u32 val;
+	u64 prev_val;
+	prev_val = local64_read(&event->count);
+	val = mmdc_read_counter(pmu_mmdc, (int) event->attr.config, prev_val);
+	local64_add(val - (u32)(prev_val&0xFFFFFFFF) , &event->count);
+}
+
+static void mmdc_event_start(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	void __iomem *mmdc_base, *reg;
+
+	local64_set(&event->count, 0);
+	mmdc_base = pmu_mmdc->mmdc_base;
+	reg = mmdc_base + MMDC_MADPCR0;
+	hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
+			HRTIMER_MODE_REL_PINNED);
+
+	writel_relaxed(DBG_RST, reg);
+	writel_relaxed(DBG_EN, reg);
+}
+
+static int mmdc_event_add(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	int cfg = (int)event->attr.config;
+	if (cfg >= 1 &&	cfg <= MMDC_NUM_COUNTERS)
+		pmu_mmdc->mmdc_events[cfg - 1] = event;
+	if (flags & PERF_EF_START)
+		mmdc_event_start(event, flags);
+	return 0;
+}
+
+static void mmdc_event_stop(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	void __iomem *mmdc_base, *reg;
+	int cfg = (int)event->attr.config;
+
+	mmdc_base = pmu_mmdc->mmdc_base;
+	reg = mmdc_base + MMDC_MADPCR0;
+	if (cfg >= 1 &&	cfg <= MMDC_NUM_COUNTERS)
+	{
+		if (hrtimer_active(&pmu_mmdc->hrtimer))
+			hrtimer_cancel(&pmu_mmdc->hrtimer);
+
+		writel_relaxed(PRF_FRZ, reg);
+		mmdc_event_update(event);
+	}
+}
+
+static void mmdc_event_del(struct perf_event *event, int flags)
+{
+	mmdc_event_stop(event, PERF_EF_UPDATE);
+}
+
+static void mmdc_overflow_handler(struct mmdc_pmu *pmu_mmdc)
+{
+	int i;
+	u32 val;
+	u64 prev_val;
+
+	for (i = 0; i < MMDC_NUM_COUNTERS; i++)
+	{
+		struct perf_event *event = pmu_mmdc->mmdc_events[i];
+		if (event)
+		{
+			prev_val = local64_read(&event->count);
+			val = mmdc_read_counter(pmu_mmdc, i + 1, prev_val);
+			local64_add(val - (u32)(prev_val&0xFFFFFFFF) , &event->count);
+		}
+	}
+}
+
+static enum hrtimer_restart mmdc_timer_handler(struct hrtimer *hrtimer)
+{
+	struct mmdc_pmu *pmu_mmdc = container_of(hrtimer, struct mmdc_pmu,
+			hrtimer);
+
+	mmdc_overflow_handler(pmu_mmdc);
+
+	hrtimer_forward_now(hrtimer, mmdc_timer_period());
+	return HRTIMER_RESTART;
+}
+
+static int mmdc_pmu_init(struct mmdc_pmu *pmu_mmdc, void __iomem *mmdc_base, struct device *dev)
+{
+	int mmdc_num;
+	*pmu_mmdc = (struct mmdc_pmu) {
+		.pmu = (struct pmu) {
+			.task_ctx_nr    = perf_invalid_context,
+			.attr_groups    = attr_groups,
+			.event_init     = mmdc_event_init,
+			.add            = mmdc_event_add,
+			.del            = mmdc_event_del,
+			.start          = mmdc_event_start,
+			.stop           = mmdc_event_stop,
+			.read           = mmdc_event_update,
+		},
+		.mmdc_base = mmdc_base,
+	};
+
+	mmdc_num = ida_simple_get(&mmdc_ida, 0, 0, GFP_KERNEL);
+
+	cpumask_set_cpu(smp_processor_id(), &pmu_mmdc->cpu);
+
+	pmu_mmdc->cpu_nb.notifier_call = mmdc_cpu_notifier;
+	pmu_mmdc->cpu_nb.priority = CPU_PRI_PERF + 1;
+
+	pmu_mmdc->dev = dev;
+	return mmdc_num;
+}
+
 static int imx_mmdc_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	void __iomem *mmdc_base, *reg;
+	struct mmdc_pmu *pmu_mmdc;
+	char * name;
 	u32 val;
 	int timeout = 0x400;
+	int mmdc_num;
 
 	mmdc_base = of_iomap(np, 0);
 	WARN_ON(!mmdc_base);
@@ -61,7 +392,35 @@ static int imx_mmdc_probe(struct platform_device *pdev)
 			__func__);
 		return -EBUSY;
 	}
+	pmu_mmdc = kzalloc(sizeof(*pmu_mmdc), GFP_KERNEL);
 
+	if (!pmu_mmdc) {
+		pr_err("failed to allocate PMU device!\n");
+		return -ENOMEM;
+	}
+	mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev);
+	dev_info(pmu_mmdc->dev, "No access to interrupts, using timer.\n");
+	hrtimer_init(&pmu_mmdc->hrtimer, CLOCK_MONOTONIC,
+			HRTIMER_MODE_REL);
+	pmu_mmdc->hrtimer.function = mmdc_timer_handler;
+	register_cpu_notifier(&pmu_mmdc->cpu_nb);
+	if (mmdc_num == 0) {
+		name = "mmdc";
+	} else {
+		int len = snprintf(NULL, 0, "mmdc_%d", mmdc_num);
+		name = devm_kzalloc(&pdev->dev, len + 1, GFP_KERNEL);
+		snprintf(name, len + 1, "mmdc_%d", mmdc_num);
+	}
+	platform_set_drvdata(pdev, pmu_mmdc);
+	perf_pmu_register(&(pmu_mmdc->pmu), name, -1);
+	return 0;
+}
+
+static int imx_mmdc_remove(struct platform_device *pdev)
+{
+	struct mmdc_pmu *pmu_mmdc = platform_get_drvdata(pdev);
+	perf_pmu_unregister(&pmu_mmdc->pmu);
+	kfree(pmu_mmdc);
 	return 0;
 }
 
@@ -81,6 +440,7 @@ static struct platform_driver imx_mmdc_driver = {
 		.of_match_table = imx_mmdc_dt_ids,
 	},
 	.probe		= imx_mmdc_probe,
+	.remove		= imx_mmdc_remove,
 };
 
 static int __init imx_mmdc_init(void)
-- 
2.9.3




More information about the linux-arm-kernel mailing list