[PATCH v1 2/2] KVM: arm64: PMU: Ensure to trap PMU access from EL0 to EL2

Mark Rutland mark.rutland at arm.com
Tue Apr 4 07:25:27 PDT 2023


On Wed, Mar 29, 2023 at 01:03:18PM +0100, Marc Zyngier wrote:
> On Wed, 29 Mar 2023 01:21:36 +0100,
> Reiji Watanabe <reijiw at google.com> wrote:
> > 
> > Currently, with VHE, KVM sets ER, CR, SW and EN bits of
> > PMUSERENR_EL0 to 1 on vcpu_load().  So, if the value of those bits
> > are cleared after vcpu_load() (the perf subsystem would do when PMU
> > counters are programmed for the guest), PMU access from the guest EL0
> > might be trapped to the guest EL1 directly regardless of the current
> > PMUSERENR_EL0 value of the vCPU.
> 
> + RobH.
> 
> Is that what is done when the event is created and armv8pmu_start()
> called? This is... crap. The EL0 access thing breaks everything, and
> nobody tested it with KVM, obviously.
> 
> I would be tempted to start mitigating it with the following:
> 
> diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
> index dde06c0f97f3..8063525bf3dd 100644
> --- a/arch/arm64/kernel/perf_event.c
> +++ b/arch/arm64/kernel/perf_event.c
> @@ -806,17 +806,19 @@ static void armv8pmu_disable_event(struct perf_event *event)
>  
>  static void armv8pmu_start(struct arm_pmu *cpu_pmu)
>  {
> -	struct perf_event_context *ctx;
> -	int nr_user = 0;
> +	if (sysctl_perf_user_access) {
> +		struct perf_event_context *ctx;
> +		int nr_user = 0;
>  
> -	ctx = perf_cpu_task_ctx();
> -	if (ctx)
> -		nr_user = ctx->nr_user;
> +		ctx = perf_cpu_task_ctx();
> +		if (ctx)
> +			nr_user = ctx->nr_user;
>  
> -	if (sysctl_perf_user_access && nr_user)
> -		armv8pmu_enable_user_access(cpu_pmu);
> -	else
> -		armv8pmu_disable_user_access();
> +		if (nr_user)
> +			armv8pmu_enable_user_access(cpu_pmu);
> +		else
> +			armv8pmu_disable_user_access();
> +	}
>  
>  	/* Enable all counters */
>  	armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
> 
> but that's obviously not enough as we want it to work with EL0 access
> enabled on the host as well.
> 
> What we miss is something that tells the PMU code "we're in a context
> where host userspace isn't present", and this would be completely
> skipped, relying on KVM to restore the appropriate state on
> vcpu_put(). But then the IPI stuff that controls EL0 can always come
> in and wreck things. Gahhh...

AFAICT the perf code only writes to PMUSERENR_EL0 in contexts where IRQs (and
hence preemption) are disabled, so as long as we have a shadow of the host
PMUSERENR value somewhere, I think we can update the perf code with something
like the below?

... unless the KVM code is interruptible before saving the host value, or after
restoring it?

Thanks,
Mark.

---->8----
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index dde06c0f97f3e..bdab3d5cbb5e3 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -741,11 +741,26 @@ static inline u32 armv8pmu_getreset_flags(void)
        return value;
 }
 
-static void armv8pmu_disable_user_access(void)
+static void update_pmuserenr(u64 val)
 {
+       lockdep_assert_irqs_disabled();
+
+       if (IS_ENABLED(CONFIG_KVM)) {
+               struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+               if (vcpu) {
+                       vcpu->arch.pmuserenr_host = val;
+                       return;
+               }
+       }
+
        write_sysreg(0, pmuserenr_el0);
 }
 
+static void armv8pmu_disable_user_access(void)
+{
+       update_pmuserenr(0);
+}
+
 static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
 {
        int i;
@@ -759,8 +774,7 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
                        armv8pmu_write_evcntr(i, 0);
        }
 
-       write_sysreg(0, pmuserenr_el0);
-       write_sysreg(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR, pmuserenr_el0);
+       update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
 }
 
 static void armv8pmu_enable_event(struct perf_event *event)




More information about the linux-arm-kernel mailing list