[RFC PATCH 37/38] arm_mpam: Add workaround for T241-MPAM-6

Ben Horgan ben.horgan at arm.com
Tue Dec 9 09:06:41 PST 2025


Hi James, Shanker,

On 12/5/25 21:59, James Morse wrote:
> From: Shanker Donthineni <sdonthineni at nvidia.com>
> 
> The registers MSMON_MBWU_L and MSMON_MBWU return the number of
> requests rather than the number of bytes transferred.
> 
> Bandwidth resource monitoring is performed at the last level cache,
> where each request arrive in 64Byte granularity. The current
> implementation returns the number of transactions received at the
> last level cache but does not provide the value in bytes. Scaling
> by 64 gives an accurate byte count to match the MPAM specification
> for the MSMON_MBWU and MSMON_MBWU_L registers. This patch fixes
> the issue by reporting the actual number of bytes instead of the
> number of transactions from __ris_msmon_read().
> 
> Signed-off-by: Shanker Donthineni <sdonthineni at nvidia.com>
> Signed-off-by: James Morse <james.morse at arm.com>
> ---
>  Documentation/arch/arm64/silicon-errata.rst |  2 ++
>  drivers/resctrl/mpam_devices.c              | 24 +++++++++++++++++++--
>  drivers/resctrl/mpam_internal.h             |  1 +
>  3 files changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
> index b18bc704d4a1..e810b2a8f40e 100644
> --- a/Documentation/arch/arm64/silicon-errata.rst
> +++ b/Documentation/arch/arm64/silicon-errata.rst
> @@ -250,6 +250,8 @@ stable kernels.
>  +----------------+-----------------+-----------------+-----------------------------+
>  | NVIDIA         | T241 MPAM       | T241-MPAM-4     | N/A                         |
>  +----------------+-----------------+-----------------+-----------------------------+
> +| NVIDIA         | T241 MPAM       | T241-MPAM-6     | N/A                         |
> ++----------------+-----------------+-----------------+-----------------------------+
>  +----------------+-----------------+-----------------+-----------------------------+
>  | Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
>  +----------------+-----------------+-----------------+-----------------------------+
> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
> index 5ba0aa703807..c17a6fdea982 100644
> --- a/drivers/resctrl/mpam_devices.c
> +++ b/drivers/resctrl/mpam_devices.c
> @@ -684,6 +684,12 @@ static const struct mpam_quirk mpam_quirks[] = {
>  	.iidr_mask  = MPAM_IIDR_MATCH_ONE,
>  	.workaround = T241_FORCE_MBW_MIN_TO_ONE,
>  	},
> +	{
> +	/* NVIDIA t241 erratum T241-MPAM-6 */
> +	.iidr       = MPAM_IIDR_NVIDIA_T421,
> +	.iidr_mask  = MPAM_IIDR_MATCH_ONE,
> +	.workaround = T241_MBW_COUNTER_SCALE_64,
> +	},
>  	{ NULL }, /* Sentinel */
>  };
>  
> @@ -1140,7 +1146,7 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>  	}
>  }
>  
> -static u64 mpam_msmon_overflow_val(enum mpam_device_features type)
> +static u64 __mpam_msmon_overflow_val(enum mpam_device_features type)
>  {
>  	/* TODO: implement scaling counters */
>  	switch (type) {
> @@ -1155,6 +1161,17 @@ static u64 mpam_msmon_overflow_val(enum mpam_device_features type)
>  	}
>  }
>  
> +static u64 mpam_msmon_overflow_val(enum mpam_device_features type,
> +				   struct mpam_msc *msc)
> +{
> +	u64 overflow_val = __mpam_msmon_overflow_val(type);
> +
> +	if (mpam_has_quirk(T241_MBW_COUNTER_SCALE_64, msc))
> +		overflow_val *= 64;
> +
> +	return overflow_val;

overflow_val wraps around for 63 bit counters. Do those need to be
considered for this errata?

> +}
> +
>  static void __ris_msmon_read(void *arg)
>  {
>  	u64 now;
> @@ -1245,13 +1262,16 @@ static void __ris_msmon_read(void *arg)
>  			now = FIELD_GET(MSMON___VALUE, now);
>  		}
>  
> +		if (mpam_has_quirk(T241_MBW_COUNTER_SCALE_64, msc))
> +			now *= 64;
> +
>  		if (nrdy)
>  			break;
>  
>  		mbwu_state = &ris->mbwu_state[ctx->mon];
>  
>  		if (overflow)
> -			mbwu_state->correction += mpam_msmon_overflow_val(m->type);
> +			mbwu_state->correction += mpam_msmon_overflow_val(m->type, msc);
>  
>  		/*
>  		 * Include bandwidth consumed before the last hardware reset and
> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
> index 01882f0acee2..108a8373901c 100644
> --- a/drivers/resctrl/mpam_internal.h
> +++ b/drivers/resctrl/mpam_internal.h
> @@ -224,6 +224,7 @@ struct mpam_props {
>  enum mpam_device_quirks {
>  	T241_SCRUB_SHADOW_REGS,
>  	T241_FORCE_MBW_MIN_TO_ONE,
> +	T241_MBW_COUNTER_SCALE_64,
>  	MPAM_QUIRK_LAST,
>  };
>  

Thanks,

Ben




More information about the linux-arm-kernel mailing list