[PATCH 1/3] perf/arm-cmn: Decouple wp_config registers from filter group number

Robin Murphy robin.murphy at arm.com
Mon Jan 29 09:06:00 PST 2024


Hi Ilkka,

On 2024-01-26 10:12 pm, Ilkka Koskinen wrote:
> Previously, wp_config0/2 registers were used for primary match group and
> wp_config1/3 registers for secondary match group. In order to support
> tertiary match group, this patch decouples the registers and the groups.

Happy to see you having a stab at this, however I fear I you're in for a 
fair dose of "if it were this simple I might have already done it" :)

> Allocation is changed to dynamic but it's still per mesh instance rather
> than per node.
> 
> Signed-off-by: Ilkka Koskinen <ilkka at os.amperecomputing.com>
> ---
>   drivers/perf/arm-cmn.c | 52 ++++++++++++++++++++++++++++++++++--------
>   1 file changed, 43 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index c584165b13ba..93eb47ea7e25 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -591,6 +591,7 @@ struct arm_cmn_hw_event {
>   	u8 dtm_offset;
>   	bool wide_sel;
>   	enum cmn_filter_select filter_sel;
> +	int wp_idx;
>   };
>   
>   #define for_each_hw_dn(hw, dn, i) \
> @@ -1337,7 +1338,35 @@ static const struct attribute_group *arm_cmn_attr_groups[] = {
>   
>   static int arm_cmn_wp_idx(struct perf_event *event)
>   {
> -	return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event);
> +	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
> +
> +	return hw->wp_idx;

Sorry, this breaks group validation.

> +}
> +
> +static int arm_cmn_wp_idx_unused(struct perf_event *event, struct arm_cmn_dtm *dtm,
> +	struct arm_cmn_dtc *dtc)
> +{
> +	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
> +	int idx, tmp, direction = CMN_EVENT_EVENTID(event);
> +
> +	/*
> +	 * Examine wp 0 & 1 for the up direction,
> +	 * examine wp 2 & 3 for the down direction
> +	 */
> +	for (idx = direction; idx < direction + 2; idx++)
> +		if (dtm->wp_event[idx] < 0)
> +			break;
> +
> +	if (idx == direction + 2)
> +		return -ENOSPC;
> +
> +	tmp = dtm->wp_event[idx ^ 1];
> +	if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
> +	    CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
> +		return -ENOSPC;
> +
> +	hw->wp_idx = idx;

I don't really get this logic either - we can allocate a potentially 
different index for every DTM, but only store the most recent one?

> +	return hw->wp_idx;
>   }
>   
>   static u32 arm_cmn_wp_config(struct perf_event *event)
> @@ -1785,6 +1814,8 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
>   
>   	for_each_hw_dtc_idx(hw, j, idx)
>   		cmn->dtc[j].counters[idx] = NULL;
> +
> +	hw->wp_idx = -1;
>   }
>   
>   static int arm_cmn_event_add(struct perf_event *event, int flags)
> @@ -1794,6 +1825,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
>   	struct arm_cmn_node *dn;
>   	enum cmn_node_type type = CMN_EVENT_TYPE(event);
>   	unsigned int input_sel, i = 0;
> +	int wp_idx;
>   
>   	if (type == CMN_TYPE_DTC) {
>   		while (cmn->dtc[i].cycles)
> @@ -1822,6 +1854,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
>   	}
>   
>   	/* ...then the local counters to feed them */
> +	wp_idx = -1;

Oh, I guess this trying to avoid some of that issue, but I still don't 
think it works - say we add an event targeted to XP B, which sees WP0 is 
free on DTM B so allocates index 0; then we add another event 
aggregating across XPs A and B, which sees WP0 is free on DTM A, 
allocates index 0, then goes on to stomp WP0 on DTM B as well - oops.

I don't think it's going to be feasible to do this without tracking the 
full allocation state with a wp_idx bitmap in the hw_event - at least it 
only needs to be half the size of dtm_idx, so I think there's still room.

Thanks,
Robin.

>   	for_each_hw_dn(hw, dn, i) {
>   		struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
>   		unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
> @@ -1835,16 +1868,17 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
>   		if (type == CMN_TYPE_XP) {
>   			input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx;
>   		} else if (type == CMN_TYPE_WP) {
> -			int tmp, wp_idx = arm_cmn_wp_idx(event);
>   			u32 cfg = arm_cmn_wp_config(event);
>   
> -			if (dtm->wp_event[wp_idx] >= 0)
> -				goto free_dtms;
> -
> -			tmp = dtm->wp_event[wp_idx ^ 1];
> -			if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
> -					CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
> -				goto free_dtms;
> +			/*
> +			 * wp_config register index is currently allocated per
> +			 * mesh instance rather than per node.
> +			 */
> +			if (wp_idx < 0) {
> +				wp_idx = arm_cmn_wp_idx_unused(event, dtm, &cmn->dtc[d]);
> +				if (wp_idx < 0)
> +					goto free_dtms;
> +			}
>   
>   			input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
>   			dtm->wp_event[wp_idx] = hw->dtc_idx[d];



More information about the linux-arm-kernel mailing list