[PATCH v5 15/24] KVM: arm64: Setup MDCR_EL2 to handle a partitioned PMU

Oliver Upton oupton at kernel.org
Tue Dec 9 13:33:16 PST 2025


On Tue, Dec 09, 2025 at 08:51:12PM +0000, Colton Lewis wrote:
> Setup MDCR_EL2 to handle a partitioned PMU. That means calculate an
> appropriate value for HPMN instead of the default maximum setting the
> host allows (which implies no partition) so hardware enforces that a
> guest will only see the counters in the guest partition.
> 
> Setting HPMN to a non default value means the global enable bit for
> the host counters is now MDCR_EL2.HPME instead of the usual
> PMCR_EL0.E. Enable the HPME bit to allow the host to count guest
> events. Since HPME only has an effect when HPMN is set which we only
> do for the guest, it is correct to enable it unconditionally here.
> 
> Unset the TPM and TPMCR bits, which trap all PMU accesses, if
> FGT (fine grain trapping) is being used.
> 
> If available, set the filtering bits HPMD and HCCD to be extra sure
> nothing in the guest counts at EL2.
> 
> Signed-off-by: Colton Lewis <coltonlewis at google.com>
> ---
>  arch/arm64/include/asm/kvm_pmu.h | 11 ++++++
>  arch/arm64/kvm/debug.c           | 29 ++++++++++++--
>  arch/arm64/kvm/pmu-direct.c      | 65 ++++++++++++++++++++++++++++++++
>  3 files changed, 102 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pmu.h b/arch/arm64/include/asm/kvm_pmu.h
> index 60b8a48cad456..8b634112eded2 100644
> --- a/arch/arm64/include/asm/kvm_pmu.h
> +++ b/arch/arm64/include/asm/kvm_pmu.h
> @@ -101,6 +101,9 @@ u64 kvm_pmu_guest_counter_mask(struct arm_pmu *pmu);
>  void kvm_pmu_host_counters_enable(void);
>  void kvm_pmu_host_counters_disable(void);
>  
> +u8 kvm_pmu_guest_num_counters(struct kvm_vcpu *vcpu);
> +u8 kvm_pmu_hpmn(struct kvm_vcpu *vcpu);
> +
>  #if !defined(__KVM_NVHE_HYPERVISOR__)
>  bool kvm_vcpu_pmu_is_partitioned(struct kvm_vcpu *vcpu);
>  bool kvm_vcpu_pmu_use_fgt(struct kvm_vcpu *vcpu);
> @@ -173,6 +176,14 @@ static inline u64 kvm_pmu_fgt2_bits(void)
>  {
>  	return 0;
>  }
> +static inline u8 kvm_pmu_guest_num_counters(struct kvm_vcpu *vcpu)
> +{
> +	return 0;
> +}
> +static inline u8 kvm_pmu_hpmn(struct kvm_vcpu *vcpu)
> +{
> +	return 0;
> +}
>  static inline void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu,
>  					     u64 select_idx, u64 val) {}
>  static inline void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu,
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 3ad6b7c6e4ba7..0ab89c91e19cb 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -36,20 +36,43 @@ static int cpu_has_spe(u64 dfr0)
>   */
>  static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
>  {
> +	int hpmn = kvm_pmu_hpmn(vcpu);
> +
>  	preempt_disable();
>  
>  	/*
>  	 * This also clears MDCR_EL2_E2PB_MASK and MDCR_EL2_E2TB_MASK
>  	 * to disable guest access to the profiling and trace buffers
>  	 */
> -	vcpu->arch.mdcr_el2 = FIELD_PREP(MDCR_EL2_HPMN,
> -					 *host_data_ptr(nr_event_counters));
> +
> +	vcpu->arch.mdcr_el2 = FIELD_PREP(MDCR_EL2_HPMN, hpmn);
>  	vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
>  				MDCR_EL2_TPMS |
>  				MDCR_EL2_TTRF |
>  				MDCR_EL2_TPMCR |
>  				MDCR_EL2_TDRA |
> -				MDCR_EL2_TDOSA);
> +				MDCR_EL2_TDOSA |
> +				MDCR_EL2_HPME);
> +
> +	if (kvm_vcpu_pmu_is_partitioned(vcpu)) {
> +		/*
> +		 * Filtering these should be redundant because we trap
> +		 * all the TYPER and FILTR registers anyway and ensure
> +		 * they filter EL2, but set the bits if they are here.
> +		 */
> +		if (is_pmuv3p1(read_pmuver()))
> +			vcpu->arch.mdcr_el2 |= MDCR_EL2_HPMD;
> +		if (is_pmuv3p5(read_pmuver()))
> +			vcpu->arch.mdcr_el2 |= MDCR_EL2_HCCD;
> +
> +		/*
> +		 * Take out the coarse grain traps if we are using
> +		 * fine grain traps.
> +		 */
> +		if (kvm_vcpu_pmu_use_fgt(vcpu))
> +			vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_TPM | MDCR_EL2_TPMCR);
> +
> +	}
>  
>  	/* Is the VM being debugged by userspace? */
>  	if (vcpu->guest_debug)
> diff --git a/arch/arm64/kvm/pmu-direct.c b/arch/arm64/kvm/pmu-direct.c
> index 4dd160c878862..7fb4fb5c22e2a 100644
> --- a/arch/arm64/kvm/pmu-direct.c
> +++ b/arch/arm64/kvm/pmu-direct.c
> @@ -154,3 +154,68 @@ void kvm_pmu_host_counters_disable(void)
>  	mdcr &= ~MDCR_EL2_HPME;
>  	write_sysreg(mdcr, mdcr_el2);
>  }

<snip>

> +/**
> + * kvm_pmu_guest_num_counters() - Number of counters to show to guest
> + * @vcpu: Pointer to struct kvm_vcpu
> + *
> + * Calculate the number of counters to show to the guest via
> + * PMCR_EL0.N, making sure to respect the maximum the host allows,
> + * which is hpmn_max if partitioned and host_max otherwise.
> + *
> + * Return: Valid value for PMCR_EL0.N
> + */
> +u8 kvm_pmu_guest_num_counters(struct kvm_vcpu *vcpu)
> +{
> +	u8 nr_cnt = vcpu->kvm->arch.nr_pmu_counters;
> +	int hpmn_max = armv8pmu_hpmn_max;
> +	u8 host_max = *host_data_ptr(nr_event_counters);
> +
> +	if (vcpu->kvm->arch.arm_pmu)
> +		hpmn_max = vcpu->kvm->arch.arm_pmu->hpmn_max;
> +
> +	if (kvm_vcpu_pmu_is_partitioned(vcpu)) {
> +		if (nr_cnt <= hpmn_max && nr_cnt <= host_max)
> +			return nr_cnt;
> +		if (hpmn_max <= host_max)
> +			return hpmn_max;
> +	}
> +
> +	if (nr_cnt <= host_max)
> +		return nr_cnt;
> +
> +	return host_max;
> +}
> +
> +/**
> + * kvm_pmu_hpmn() - Calculate HPMN field value
> + * @vcpu: Pointer to struct kvm_vcpu
> + *
> + * Calculate the appropriate value to set for MDCR_EL2.HPMN. If
> + * partitioned, this is the number of counters set for the guest if
> + * supported, falling back to hpmn_max if needed. If we are not
> + * partitioned or can't set the implied HPMN value, fall back to the
> + * host value.
> + *
> + * Return: A valid HPMN value
> + */
> +u8 kvm_pmu_hpmn(struct kvm_vcpu *vcpu)
> +{
> +	u8 nr_guest_cnt = kvm_pmu_guest_num_counters(vcpu);
> +	int nr_guest_cnt_max = armv8pmu_hpmn_max;
> +	u8 nr_host_cnt_max = *host_data_ptr(nr_event_counters);
> +
> +	if (vcpu->kvm->arch.arm_pmu)
> +		nr_guest_cnt_max = vcpu->kvm->arch.arm_pmu->hpmn_max;
> +
> +	if (kvm_vcpu_pmu_is_partitioned(vcpu)) {
> +		if (cpus_have_final_cap(ARM64_HAS_HPMN0))
> +			return nr_guest_cnt;
> +		else if (nr_guest_cnt > 0)
> +			return nr_guest_cnt;
> +		else if (nr_guest_cnt_max > 0)
> +			return nr_guest_cnt_max;
> +	}
> +
> +	return nr_host_cnt_max;
> +}

</snip>

I find all of this rather confusing. It seems like you're dealing with
sanitizing kvm->arch.nr_pmu_counters vs. the underlying implementation.
I'm not sure why you need to do that, I would expect that we reject
unsupported values at the time of the ioctl.

The only thing you do need to handle is if the vCPU has migrated to an
"unsupported" CPU, for which we already have supporting helpers. I'm too
lazy to fetch the Arm ARM and cite architecture but I'm pretty sure an
out-of-range HPMN has UNPREDICTABLE behavior.

I think you just need to move the vcpu_set_unsupported_cpu() call earlier in
kvm_arch_vcpu_load().

Taking all that into consideration:

u8 kvm_mdcr_hpmn(struct kvm_vcpu *vcpu)
{
	u8 nr_counters = *host_data_ptr(nr_event_counters);

	if (!kvm_vcpu_pmu_is_partitioned(vcpu) || vcpu_on_unsupported_cpu(vcpu))
		return nr_counters;

	return vcpu->kvm->arch.nr_pmu_counters;
}

Thanks,
Oliver



More information about the linux-arm-kernel mailing list