[PATCH v4 15/21] PM / devfreq: rockchip-dfi: Add perf support

Sascha Hauer s.hauer at pengutronix.de
Wed May 17 07:26:34 PDT 2023


On Wed, May 17, 2023 at 11:53:59AM +0100, Jonathan Cameron wrote:
> On Fri,  5 May 2023 13:38:50 +0200
> Sascha Hauer <s.hauer at pengutronix.de> wrote:
> 
> > The DFI is a unit which is suitable for measuring DDR utilization, but
> > so far it could only be used as an event driver for the DDR frequency
> > scaling driver. This adds perf support to the DFI driver.
> > 
> > Usage with the 'perf' tool can look like:
> > 
> > perf stat -a -e rockchip_ddr/cycles/,\
> > 		rockchip_ddr/read-bytes/,\
> > 		rockchip_ddr/write-bytes/,\
> > 		rockchip_ddr/bytes/ sleep 1
> > 
> >  Performance counter stats for 'system wide':
> > 
> >         1582524826      rockchip_ddr/cycles/
> >            1802.25 MB   rockchip_ddr/read-bytes/
> >            1793.72 MB   rockchip_ddr/write-bytes/
> >            3595.90 MB   rockchip_ddr/bytes/
> > 
> >        1.014369709 seconds time elapsed
> > 
> > perf support has been tested on a RK3568 and a RK3399, the latter with
> > dual channel DDR.
> > 
> > Signed-off-by: Sascha Hauer <s.hauer at pengutronix.de>
> 
> A few comments to add to Robin's review.
> 
> > ---
> >  drivers/devfreq/event/rockchip-dfi.c | 349 +++++++++++++++++++++++++++
> >  include/soc/rockchip/rk3399_grf.h    |   2 +
> >  include/soc/rockchip/rk3568_grf.h    |   1 +
> >  3 files changed, 352 insertions(+)
> > 
> > diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c
> > index eae010644935a..400b1b360e3c9 100644
> > --- a/drivers/devfreq/event/rockchip-dfi.c
> > +++ b/drivers/devfreq/event/rockchip-dfi.c
> > @@ -20,6 +20,7 @@
> >  #include <linux/of_device.h>
> >  #include <linux/bitfield.h>
> >  #include <linux/bits.h>
> > +#include <linux/perf_event.h>
> >  
> >  #include <soc/rockchip/rockchip_grf.h>
> >  #include <soc/rockchip/rk3399_grf.h>
> > @@ -41,14 +42,30 @@
> >  					 DDRMON_CTRL_LPDDR4 | \
> >  					 DDRMON_CTRL_LPDDR23)
> >  
> > +#define DDRMON_CH0_WR_NUM		0x20
> > +#define DDRMON_CH0_RD_NUM		0x24
> >  #define DDRMON_CH0_COUNT_NUM		0x28
> >  #define DDRMON_CH0_DFI_ACCESS_NUM	0x2c
> >  #define DDRMON_CH1_COUNT_NUM		0x3c
> >  #define DDRMON_CH1_DFI_ACCESS_NUM	0x40
> >  
> > +enum access_type {
> > +	PERF_EVENT_CYCLES,
> > +	PERF_EVENT_READ_BYTES,
> > +	PERF_EVENT_WRITE_BYTES,
> > +	PERF_EVENT_BYTES,
> > +	PERF_ACCESS_TYPE_MAX,
> > +};
> > +
> >  struct dmc_count_channel {
> >  	u32 access;
> >  	u32 total;
> > +	u32 read_access;
> > +	u32 write_access;
> 
> Silly question, but is access = read_access + write_access?
> If so no need for keeping track of all 3 around.

Yes, 'access' is the sum of 'read_access' and 'write_access'.

The number of accesses can be read from a separate register. The original
devfreq driver used that register only, the perf support now wants to
have the read and write accesses counted separately.

You are right, we could now remove 'access'. Not sure if it's worth it
though, at least during development I was happy to see that the values
are consistent.

> 
> Come to think of it, total could do with a more meaningful name
> or a comment at least.  total what?

'total' counts the number of clock cycles in the DDR controller. I'll
add a patch to rename it accordingly.

> 
> > +};
> > +
> > +struct dmc_count_channel64 {
> > +	u64 count[PERF_ACCESS_TYPE_MAX];
> >  };
> 
> Why bother with the structure?

because copying a struct like this works:

	struct dmc_count_channel64 a, b;

	a = b;

With plain arrays it doesn't work without looping over the array or
using memcpy.

> 
> >  
> >  struct dmc_count {
> > @@ -65,6 +82,7 @@ struct rockchip_dfi {
> >  	struct devfreq_event_desc desc;
> >  	struct dmc_count count;
> >  	struct dmc_count last_event_count;
> > +	struct dmc_count last;
> 
> Needs a more descriptive name...

Yes, I already changed that here. This will look differently overall
next round.

> 
> >  	struct device *dev;
> >  	void __iomem *regs;
> >  	struct regmap *regmap_pmu;
> > @@ -73,6 +91,15 @@ struct rockchip_dfi {
> >  	struct mutex mutex;
> >  	u32 ddr_type;
> >  	unsigned int channel_mask;
> > +	enum cpuhp_state cpuhp_state;
> > +	struct hlist_node node;
> > +	struct pmu pmu;
> > +	struct hrtimer timer;
> > +	unsigned int cpu;
> > +	struct dmc_count_channel64 frr;
> > +	int active_events;
> > +	int burst_len;
> > +	int buswidth[DMC_MAX_CHANNELS];
> >  };
> >  
> >  static int rockchip_dfi_enable(struct rockchip_dfi *dfi)
> > @@ -148,6 +175,10 @@ static void rockchip_dfi_read_counters(struct rockchip_dfi *dfi, struct dmc_coun
> >  	for (i = 0; i < DMC_MAX_CHANNELS; i++) {
> >  		if (!(dfi->channel_mask & BIT(i)))
> >  			continue;
> > +		count->c[i].read_access = readl_relaxed(dfi_regs +
> > +				DDRMON_CH0_RD_NUM + i * 20);
> > +		count->c[i].write_access = readl_relaxed(dfi_regs +
> > +				DDRMON_CH0_WR_NUM + i * 20);
> 
> I guess no expensive, but you could gate these on the perf support being built given
> not used for anything else

Right, I think it's better readable without this ifdef or IS_ENABLED().

> 
> >  		count->c[i].access = readl_relaxed(dfi_regs +
> >  				DDRMON_CH0_DFI_ACCESS_NUM + i * 20);
> >  		count->c[i].total = readl_relaxed(dfi_regs +
> > @@ -218,6 +249,305 @@ static const struct devfreq_event_ops rockchip_dfi_ops = {
> >  	.set_event = rockchip_dfi_set_event,
> >  };
> >  
> 
> > +
> > +static void rockchip_ddr_perf_event_start(struct perf_event *event, int flags)
> > +{
> > +	struct rockchip_dfi *dfi = container_of(event->pmu, struct rockchip_dfi, pmu);
> > +
> > +	rockchip_ddr_perf_update_counters(dfi);
> > +
> > +	local64_set(&event->hw.prev_count, dfi->frr.count[event->attr.config]);
> > +}
> > +
> > +static int rockchip_ddr_perf_event_add(struct perf_event *event, int flags)
> > +{
> > +	struct rockchip_dfi *dfi = container_of(event->pmu, struct rockchip_dfi, pmu);
> > +	struct hw_perf_event *hwc = &event->hw;
> > +
> > +	hwc->state |= PERF_HES_STOPPED;
> > +
> > +	dfi->active_events++;
> > +
> > +	if (dfi->active_events == 1) {
> > +		rockchip_dfi_enable(dfi);
> > +		hrtimer_start(&dfi->timer, 0, HRTIMER_MODE_REL);
> 
> Trigger immediately?  Lot of work to just call the function.  Perhaps
> instead wrap the contents of the callback with two functions, the callback
> itself and one that does same reads etc and starts the timer.

No need to trigger immediately, it just has to trigger before the timers
overflow. I can pass ns_to_ktime(NSEC_PER_SEC) instead of 0.

> 
> I guess it doesn't really matter though.
> 
> To my mind the timer start should be in the event_start callback, but I
> see there is plenty of precedence for doing it add and I doubt it matters.
> 
> > +	}
> > +
> > +	if (flags & PERF_EF_START)
> > +		rockchip_ddr_perf_event_start(event, flags);
> > +
> > +	return 0;
> > +}
> > +
> > +static void rockchip_ddr_perf_event_stop(struct perf_event *event, int flags)
> > +{
> > +	rockchip_ddr_perf_event_update(event);
> > +}
> > +
> > +static void rockchip_ddr_perf_event_del(struct perf_event *event, int flags)
> > +{
> > +	struct rockchip_dfi *dfi = container_of(event->pmu, struct rockchip_dfi, pmu);
> > +
> > +	rockchip_ddr_perf_event_stop(event, PERF_EF_UPDATE);
> > +
> > +	dfi->active_events--;
> > +
> > +	if (dfi->active_events == 0) {
> > +		hrtimer_cancel(&dfi->timer);
> > +		rockchip_dfi_disable(dfi);
> > +	}
> > +}
> > +
> > +static enum hrtimer_restart rockchip_dfi_timer(struct hrtimer *timer)
> > +{
> > +	struct rockchip_dfi *dfi = container_of(timer, struct rockchip_dfi, timer);
> > +	ktime_t timeout;
> > +
> > +	rockchip_ddr_perf_update_counters(dfi);
> > +
> > +	timeout = ns_to_ktime(NSEC_PER_SEC);
> > +	hrtimer_forward_now(&dfi->timer, timeout);
> 
> Trivial: No real advantage in local variable.
> 
> 	hrtimer_forward-now(&dfi->timer, ns_to_ktime(NSEC_PER_SEC));
> 
> > +
> > +	return HRTIMER_RESTART;
> > +};
> 
> ...
> 
> > +static int rockchip_ddr_perf_init(struct rockchip_dfi *dfi)
> > +{
> 
> ...
> 
> > +
> > +	dfi->cpuhp_state = ret;
> > +
> > +	/* Register the pmu instance for cpu hotplug */
> 
> I'd argue that's pretty obvious so comment not needed.
> 
> > +	ret = cpuhp_state_add_instance_nocalls(dfi->cpuhp_state, &dfi->node);
> > +	if (ret) {
> > +		dev_err(dfi->dev, "Error %d registering hotplug\n", ret);
> > +		goto cpuhp_instance_err;
> > +	}
> > +
> > +	hrtimer_init(&dfi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> > +	dfi->timer.function = rockchip_dfi_timer;
> > +
> > +	switch (dfi->ddr_type) {
> > +	case ROCKCHIP_DDRTYPE_LPDDR2:
> > +	case ROCKCHIP_DDRTYPE_LPDDR3:
> > +		dfi->burst_len = 8;
> > +		break;
> > +	case ROCKCHIP_DDRTYPE_LPDDR4:
> > +	case ROCKCHIP_DDRTYPE_LPDDR4X:
> > +		dfi->burst_len = 16;
> > +		break;
> > +	}
> > +
> > +	ret = perf_pmu_register(pmu, "rockchip_ddr", -1);
> > +	if (ret)
> > +		goto ddr_perf_err;
> > +
> > +	return 0;
> > +
> > +ddr_perf_err:
> > +	cpuhp_state_remove_instance_nocalls(dfi->cpuhp_state, &dfi->node);
> > +cpuhp_instance_err:
> > +	cpuhp_remove_multi_state(dfi->cpuhp_state);
> 
> Instead of a single devm callback as suggested below, you could
> do them separately for the various steps.  That way you can avoid
> the need to do explicit error handling in this function.

Ok, will add some devm_add_action_or_reset().

Sascha

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |



More information about the Linux-rockchip mailing list