[PATCH 2/3] perf/arm-cmn: Rework DTC counters (again)

Will Deacon will at kernel.org
Mon Oct 23 03:26:54 PDT 2023


On Fri, Oct 20, 2023 at 06:51:26PM +0100, Robin Murphy wrote:
> The bitmap-based scheme for tracking DTC counter usage turns out to be a
> complete dead-end for its imagined purpose, since by the time we have to
> keep track of a per-DTC counter index anyway, we already have enough
> information to make the bitmap itself redundant. Revert the remains of
> it back to almost the original scheme, but now expanded to track per-DTC
> indices, in preparation for making use of them in anger.
> 
> Note that since cycle count events always use a dedicated counter on a
> single DTC, we reuse the field to encode their DTC index directly.
> 
> Signed-off-by: Robin Murphy <robin.murphy at arm.com>
> ---
>  drivers/perf/arm-cmn.c | 126 +++++++++++++++++++++--------------------
>  1 file changed, 64 insertions(+), 62 deletions(-)
> 
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index f1ac8d0cdb3b..675f1638013e 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -281,16 +281,13 @@ struct arm_cmn_node {
>  	u16 id, logid;
>  	enum cmn_node_type type;
>  
> -	int dtm;
> -	union {
> -		/* DN/HN-F/CXHA */
> -		struct {
> -			u8 val : 4;
> -			u8 count : 4;
> -		} occupid[SEL_MAX];
> -		/* XP */
> -		u8 dtc;
> -	};
> +	u8 dtm;
> +	s8 dtc;
> +	/* DN/HN-F/CXHA */
> +	struct {
> +		u8 val : 4;
> +		u8 count : 4;
> +	} occupid[SEL_MAX];
>  	union {
>  		u8 event[4];
>  		__le32 event_sel;
> @@ -540,12 +537,12 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
>  
>  		seq_puts(s, "\n     |");
>  		for (x = 0; x < cmn->mesh_x; x++) {
> -			u8 dtc = cmn->xps[xp_base + x].dtc;
> +			s8 dtc = cmn->xps[xp_base + x].dtc;
>  
> -			if (dtc & (dtc - 1))
> +			if (dtc < 0)
>  				seq_puts(s, " DTC ?? |");
>  			else
> -				seq_printf(s, " DTC %ld  |", __ffs(dtc));
> +				seq_printf(s, " DTC %d  |", dtc);
>  		}
>  		seq_puts(s, "\n     |");
>  		for (x = 0; x < cmn->mesh_x; x++)
> @@ -589,8 +586,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
>  struct arm_cmn_hw_event {
>  	struct arm_cmn_node *dn;
>  	u64 dtm_idx[4];
> -	unsigned int dtc_idx;
> -	u8 dtcs_used;
> +	s8 dtc_idx[CMN_MAX_DTCS];
>  	u8 num_dns;
>  	u8 dtm_offset;
>  	bool wide_sel;
> @@ -600,6 +596,10 @@ struct arm_cmn_hw_event {
>  #define for_each_hw_dn(hw, dn, i) \
>  	for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
>  
> +/* @i is the DTC number, @idx is the counter index on that DTC */
> +#define for_each_hw_dtc_idx(hw, i, idx) \
> +	for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0)

This macro is pretty hideous ;) The kbuild robot complained as well, but
given that it's internal to the driver and it does make the callsites
quite a bit simpler, I'm inclined to stick with it for now. At least, I
couldn't come up with something else which was just as succinct.

Will



More information about the linux-arm-kernel mailing list