[PATCH v2 2/2] KVM: arm64: timers: Save restore CVAL of a ptimer across guest entry and exits

Marc Zyngier maz at kernel.org
Sun Sep 10 04:45:55 PDT 2023


On Mon, 04 Sep 2023 12:42:18 +0100,
Ganapatrao Kulkarni <gankulkarni at os.amperecomputing.com> wrote:
> 
> As per FEAT_ECV, when HCR_EL2.{E2H, TGE} == {1, 1}, Enhanced Counter
> Virtualization functionality is disabled and CNTPOFF_EL2 value is treated
> as zero. On VHE host, E2H and TGE are set, hence it is required
> to save Guest's CVAL to memory and increment it by CNTPOFF_EL2 at
> guest exit to avoid false physical timer interrupts and also
> restore back the CVAL with saved value before the guest entry.
> 
> Signed-off-by: Ganapatrao Kulkarni <gankulkarni at os.amperecomputing.com>
> ---
>  arch/arm64/kvm/arch_timer.c     | 60 ++++++++++++++++++++++++++++++---
>  arch/arm64/kvm/hyp/vhe/switch.c | 13 +++++++
>  include/kvm/arm_arch_timer.h    |  1 +
>  3 files changed, 69 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
> index 98b0e8ac02ae..9fe3fa6ed98a 100644
> --- a/arch/arm64/kvm/arch_timer.c
> +++ b/arch/arm64/kvm/arch_timer.c
> @@ -514,6 +514,59 @@ static void set_cntpoff(u64 cntpoff)
>  		write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
>  }
>  
> +static void ptimer_cval_save(struct arch_timer_context *ctx, u64 offset)
> +{
> +	unsigned long flags;
> +	u64 cval;
> +
> +	local_irq_save(flags);
> +	cval = read_sysreg_el0(SYS_CNTP_CVAL);
> +	timer_set_cval(ctx, cval);
> +	cval += offset;
> +	write_sysreg_el0(cval, SYS_CNTP_CVAL);
> +	isb();
> +	local_irq_restore(flags);
> +}
> +
> +static void ptimer_cval_restore(struct arch_timer_context *ctx, u64 offset)
> +{
> +	unsigned long flags;
> +	u64 cval;
> +
> +	local_irq_save(flags);
> +	cval = timer_get_cval(ctx);
> +	write_sysreg_el0(cval, SYS_CNTP_CVAL);
> +	isb();
> +	local_irq_restore(flags);
> +}
> +
> +void kvm_ptimer_cval_save_restore(struct kvm_vcpu *vcpu, bool save)
> +{
> +	struct timer_map map;
> +	struct arch_timer_cpu *timer;
> +	struct arch_timer_context *ctxp;
> +	u64 offset;
> +
> +	get_timer_map(vcpu, &map);
> +	ctxp = map.direct_ptimer;
> +
> +	if (unlikely(ctxp == NULL))
> +		return;
> +
> +	offset = timer_get_offset(ctxp);
> +	if (!offset)
> +		return;
> +
> +	timer = vcpu_timer(ctxp->vcpu);
> +	if (!timer->enabled || !ctxp->loaded)
> +		return;
> +
> +	if (save)
> +		ptimer_cval_save(ctxp, offset);
> +	else
> +		ptimer_cval_restore(ctxp, offset);
> +}
> +

I really don't want to see any of this code in the arch_timer
backend. There is nothing here that executes in the context of the
world-switch until this, and I think this is the wrong level of
abstraction.

You end-up doing things that make no sense in the expected execution
context (timer not loaded?, not enabled?, disabling interrupts?), and
this makes the whole thing pointlessly complex.

My take on this problem is still the same (vaguely amended compared to
the previous version). If this doesn't work for you, please explain
what is wrong with it. But it has the shape of what I really want to
see.

Thanks,

	M.

From 2516a1410c0d45a7d3ba0523847be339bfcb1e50 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz at kernel.org>
Date: Tue, 22 Aug 2023 13:18:10 +0100
Subject: [PATCH] KVM: arm64: timers: Correctly handle TGE flip with
 CNTPOFF_EL2

Contrary to common belief, HCR_EL2.TGE has a direct and immediate
effect on the way the EL0 physical counter is offset. Flipping
TGE from 1 to 0 while at EL2 immediately changes the way the counter
compared to the CVAL limit.

This means that we cannot directly save/restore the guest's view of
CVAL, but that we instead must treat it as if CNTPOFF didn't exist.
Only in the world switch, once we figure out that we do have CNTPOFF,
can we must the offset back and forth depending on the polarity of
TGE.

Fixes: 2b4825a86940 ("KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer")
Reported-by: Ganapatrao Kulkarni <gankulkarni at os.amperecomputing.com>
Signed-off-by: Marc Zyngier <maz at kernel.org>
---
 arch/arm64/kvm/arch_timer.c     | 13 +++-------
 arch/arm64/kvm/hyp/vhe/switch.c | 45 +++++++++++++++++++++++++++++++++
 include/kvm/arm_arch_timer.h    |  7 +++++
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 6dcdae4d38cb..a1e24228aaaa 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
 	.get_input_level = kvm_arch_timer_get_input_level,
 };
 
-static bool has_cntpoff(void)
-{
-	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
-}
-
 static int nr_timers(struct kvm_vcpu *vcpu)
 {
 	if (!vcpu_has_nv(vcpu))
@@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
 	return timecounter->cc->read(timecounter->cc);
 }
 
-static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
 {
 	if (vcpu_has_nv(vcpu)) {
 		if (is_hyp_ctxt(vcpu)) {
@@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
 		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
 		cval = read_sysreg_el0(SYS_CNTP_CVAL);
 
-		if (!has_cntpoff())
-			cval -= timer_get_offset(ctx);
+		cval -= timer_get_offset(ctx);
 
 		timer_set_cval(ctx, cval);
 
@@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
 		cval = timer_get_cval(ctx);
 		offset = timer_get_offset(ctx);
 		set_cntpoff(offset);
-		if (!has_cntpoff())
-			cval += offset;
+		cval += offset;
 		write_sysreg_el0(cval, SYS_CNTP_CVAL);
 		isb();
 		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 6537f58b1a8c..5dcbde57f466 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	___activate_traps(vcpu);
 
+	if (has_cntpoff()) {
+		struct timer_map map;
+
+		get_timer_map(vcpu, &map);
+
+		/*
+		 * We're entrering the guest. Reload the correct
+		 * values from memory now that TGE is clear.
+		 */
+		if (map.direct_ptimer == vcpu_ptimer(vcpu))
+			val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+		if (map.direct_ptimer == vcpu_hptimer(vcpu))
+			val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
+
+		if (map.direct_ptimer) {
+			write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
+			isb();
+		}
+	}
+
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_ELx_TTA;
 	val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
@@ -77,6 +97,31 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 
+	if (has_cntpoff()) {
+		struct timer_map map;
+		u64 val, offset;
+
+		get_timer_map(vcpu, &map);
+
+		/*
+		 * We're exiting the guest. Save the latest CVAL value
+		 * to memory and apply the offset now that TGE is set.
+		 */
+		val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
+		if (map.direct_ptimer == vcpu_ptimer(vcpu))
+			__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
+		if (map.direct_ptimer == vcpu_hptimer(vcpu))
+			__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
+
+		offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+
+		if (map.direct_ptimer && offset) {
+			offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+			write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
+			isb();
+		}
+	}
+
 	/*
 	 * ARM errata 1165522 and 1530923 require the actual execution of the
 	 * above before we can switch to the EL2/EL0 translation regime used by
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index bb3cb005873e..e748bc957d83 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -82,6 +82,8 @@ struct timer_map {
 	struct arch_timer_context *emul_ptimer;
 };
 
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map);
+
 struct arch_timer_cpu {
 	struct arch_timer_context timers[NR_KVM_TIMERS];
 
@@ -145,4 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt);
 void kvm_timer_cpu_up(void);
 void kvm_timer_cpu_down(void);
 
+static inline bool has_cntpoff(void)
+{
+	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
+}
+
 #endif
-- 
2.34.1


-- 
Without deviation from the norm, progress is not possible.



More information about the linux-arm-kernel mailing list