[PATCH v5 15/24] KVM: arm64: Setup MDCR_EL2 to handle a partitioned PMU

Colton Lewis coltonlewis at google.com
Fri Dec 12 13:22:23 PST 2025


Oliver Upton <oupton at kernel.org> writes:

> On Tue, Dec 09, 2025 at 08:51:12PM +0000, Colton Lewis wrote:
>> Setup MDCR_EL2 to handle a partitioned PMU. That means calculate an
>> appropriate value for HPMN instead of the default maximum setting the
>> host allows (which implies no partition) so hardware enforces that a
>> guest will only see the counters in the guest partition.

>> Setting HPMN to a non default value means the global enable bit for
>> the host counters is now MDCR_EL2.HPME instead of the usual
>> PMCR_EL0.E. Enable the HPME bit to allow the host to count guest
>> events. Since HPME only has an effect when HPMN is set which we only
>> do for the guest, it is correct to enable it unconditionally here.

>> Unset the TPM and TPMCR bits, which trap all PMU accesses, if
>> FGT (fine grain trapping) is being used.

>> If available, set the filtering bits HPMD and HCCD to be extra sure
>> nothing in the guest counts at EL2.

>> Signed-off-by: Colton Lewis <coltonlewis at google.com>
>> ---
>>   arch/arm64/include/asm/kvm_pmu.h | 11 ++++++
>>   arch/arm64/kvm/debug.c           | 29 ++++++++++++--
>>   arch/arm64/kvm/pmu-direct.c      | 65 ++++++++++++++++++++++++++++++++
>>   3 files changed, 102 insertions(+), 3 deletions(-)

>> diff --git a/arch/arm64/include/asm/kvm_pmu.h  
>> b/arch/arm64/include/asm/kvm_pmu.h
>> index 60b8a48cad456..8b634112eded2 100644
>> --- a/arch/arm64/include/asm/kvm_pmu.h
>> +++ b/arch/arm64/include/asm/kvm_pmu.h
>> @@ -101,6 +101,9 @@ u64 kvm_pmu_guest_counter_mask(struct arm_pmu *pmu);
>>   void kvm_pmu_host_counters_enable(void);
>>   void kvm_pmu_host_counters_disable(void);

>> +u8 kvm_pmu_guest_num_counters(struct kvm_vcpu *vcpu);
>> +u8 kvm_pmu_hpmn(struct kvm_vcpu *vcpu);
>> +
>>   #if !defined(__KVM_NVHE_HYPERVISOR__)
>>   bool kvm_vcpu_pmu_is_partitioned(struct kvm_vcpu *vcpu);
>>   bool kvm_vcpu_pmu_use_fgt(struct kvm_vcpu *vcpu);
>> @@ -173,6 +176,14 @@ static inline u64 kvm_pmu_fgt2_bits(void)
>>   {
>>   	return 0;
>>   }
>> +static inline u8 kvm_pmu_guest_num_counters(struct kvm_vcpu *vcpu)
>> +{
>> +	return 0;
>> +}
>> +static inline u8 kvm_pmu_hpmn(struct kvm_vcpu *vcpu)
>> +{
>> +	return 0;
>> +}
>>   static inline void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu,
>>   					     u64 select_idx, u64 val) {}
>>   static inline void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu,
>> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
>> index 3ad6b7c6e4ba7..0ab89c91e19cb 100644
>> --- a/arch/arm64/kvm/debug.c
>> +++ b/arch/arm64/kvm/debug.c
>> @@ -36,20 +36,43 @@ static int cpu_has_spe(u64 dfr0)
>>    */
>>   static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
>>   {
>> +	int hpmn = kvm_pmu_hpmn(vcpu);
>> +
>>   	preempt_disable();

>>   	/*
>>   	 * This also clears MDCR_EL2_E2PB_MASK and MDCR_EL2_E2TB_MASK
>>   	 * to disable guest access to the profiling and trace buffers
>>   	 */
>> -	vcpu->arch.mdcr_el2 = FIELD_PREP(MDCR_EL2_HPMN,
>> -					 *host_data_ptr(nr_event_counters));
>> +
>> +	vcpu->arch.mdcr_el2 = FIELD_PREP(MDCR_EL2_HPMN, hpmn);
>>   	vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
>>   				MDCR_EL2_TPMS |
>>   				MDCR_EL2_TTRF |
>>   				MDCR_EL2_TPMCR |
>>   				MDCR_EL2_TDRA |
>> -				MDCR_EL2_TDOSA);
>> +				MDCR_EL2_TDOSA |
>> +				MDCR_EL2_HPME);
>> +
>> +	if (kvm_vcpu_pmu_is_partitioned(vcpu)) {
>> +		/*
>> +		 * Filtering these should be redundant because we trap
>> +		 * all the TYPER and FILTR registers anyway and ensure
>> +		 * they filter EL2, but set the bits if they are here.
>> +		 */
>> +		if (is_pmuv3p1(read_pmuver()))
>> +			vcpu->arch.mdcr_el2 |= MDCR_EL2_HPMD;
>> +		if (is_pmuv3p5(read_pmuver()))
>> +			vcpu->arch.mdcr_el2 |= MDCR_EL2_HCCD;
>> +
>> +		/*
>> +		 * Take out the coarse grain traps if we are using
>> +		 * fine grain traps.
>> +		 */
>> +		if (kvm_vcpu_pmu_use_fgt(vcpu))
>> +			vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_TPM | MDCR_EL2_TPMCR);
>> +
>> +	}

>>   	/* Is the VM being debugged by userspace? */
>>   	if (vcpu->guest_debug)
>> diff --git a/arch/arm64/kvm/pmu-direct.c b/arch/arm64/kvm/pmu-direct.c
>> index 4dd160c878862..7fb4fb5c22e2a 100644
>> --- a/arch/arm64/kvm/pmu-direct.c
>> +++ b/arch/arm64/kvm/pmu-direct.c
>> @@ -154,3 +154,68 @@ void kvm_pmu_host_counters_disable(void)
>>   	mdcr &= ~MDCR_EL2_HPME;
>>   	write_sysreg(mdcr, mdcr_el2);
>>   }

> <snip>

>> +/**
>> + * kvm_pmu_guest_num_counters() - Number of counters to show to guest
>> + * @vcpu: Pointer to struct kvm_vcpu
>> + *
>> + * Calculate the number of counters to show to the guest via
>> + * PMCR_EL0.N, making sure to respect the maximum the host allows,
>> + * which is hpmn_max if partitioned and host_max otherwise.
>> + *
>> + * Return: Valid value for PMCR_EL0.N
>> + */
>> +u8 kvm_pmu_guest_num_counters(struct kvm_vcpu *vcpu)
>> +{
>> +	u8 nr_cnt = vcpu->kvm->arch.nr_pmu_counters;
>> +	int hpmn_max = armv8pmu_hpmn_max;
>> +	u8 host_max = *host_data_ptr(nr_event_counters);
>> +
>> +	if (vcpu->kvm->arch.arm_pmu)
>> +		hpmn_max = vcpu->kvm->arch.arm_pmu->hpmn_max;
>> +
>> +	if (kvm_vcpu_pmu_is_partitioned(vcpu)) {
>> +		if (nr_cnt <= hpmn_max && nr_cnt <= host_max)
>> +			return nr_cnt;
>> +		if (hpmn_max <= host_max)
>> +			return hpmn_max;
>> +	}
>> +
>> +	if (nr_cnt <= host_max)
>> +		return nr_cnt;
>> +
>> +	return host_max;
>> +}
>> +
>> +/**
>> + * kvm_pmu_hpmn() - Calculate HPMN field value
>> + * @vcpu: Pointer to struct kvm_vcpu
>> + *
>> + * Calculate the appropriate value to set for MDCR_EL2.HPMN. If
>> + * partitioned, this is the number of counters set for the guest if
>> + * supported, falling back to hpmn_max if needed. If we are not
>> + * partitioned or can't set the implied HPMN value, fall back to the
>> + * host value.
>> + *
>> + * Return: A valid HPMN value
>> + */
>> +u8 kvm_pmu_hpmn(struct kvm_vcpu *vcpu)
>> +{
>> +	u8 nr_guest_cnt = kvm_pmu_guest_num_counters(vcpu);
>> +	int nr_guest_cnt_max = armv8pmu_hpmn_max;
>> +	u8 nr_host_cnt_max = *host_data_ptr(nr_event_counters);
>> +
>> +	if (vcpu->kvm->arch.arm_pmu)
>> +		nr_guest_cnt_max = vcpu->kvm->arch.arm_pmu->hpmn_max;
>> +
>> +	if (kvm_vcpu_pmu_is_partitioned(vcpu)) {
>> +		if (cpus_have_final_cap(ARM64_HAS_HPMN0))
>> +			return nr_guest_cnt;
>> +		else if (nr_guest_cnt > 0)
>> +			return nr_guest_cnt;
>> +		else if (nr_guest_cnt_max > 0)
>> +			return nr_guest_cnt_max;
>> +	}
>> +
>> +	return nr_host_cnt_max;
>> +}

> </snip>

> I find all of this rather confusing. It seems like you're dealing with
> sanitizing kvm->arch.nr_pmu_counters vs. the underlying implementation.
> I'm not sure why you need to do that, I would expect that we reject
> unsupported values at the time of the ioctl.

I agree it makes more sense to do the validating at the ioctl. I'll do that.

> The only thing you do need to handle is if the vCPU has migrated to an
> "unsupported" CPU, for which we already have supporting helpers. I'm too
> lazy to fetch the Arm ARM and cite architecture but I'm pretty sure an
> out-of-range HPMN has UNPREDICTABLE behavior.

It does.

> I think you just need to move the vcpu_set_unsupported_cpu() call earlier  
> in
> kvm_arch_vcpu_load().

> Taking all that into consideration:

> u8 kvm_mdcr_hpmn(struct kvm_vcpu *vcpu)
> {
> 	u8 nr_counters = *host_data_ptr(nr_event_counters);

> 	if (!kvm_vcpu_pmu_is_partitioned(vcpu) || vcpu_on_unsupported_cpu(vcpu))
> 		return nr_counters;

> 	return vcpu->kvm->arch.nr_pmu_counters;
> }

Something like that will probably work. But I also need to account for
if arch.nr_pmu_counters is 0 and we don't have HPMN0, which is also
unpredictable. I don't think I can reject that at the ioctl because we
still want to support that case. Is there any way to handle that through
vcpu_on_unsupported_cpu()?

> Thanks,
> Oliver



More information about the linux-arm-kernel mailing list