[PATCH 2/2] KVM: arm64: timers: Adjust CVAL of a ptimer across guest entry and exits

Ganapatrao Kulkarni gankulkarni at os.amperecomputing.com
Tue Aug 22 07:57:21 PDT 2023



On 22-08-2023 05:46 pm, Marc Zyngier wrote:
> On Thu, 17 Aug 2023 10:27:55 +0100,
> Ganapatrao Kulkarni <gankulkarni at os.amperecomputing.com> wrote:
>>
>>
>> Hi Marc,
>>
>> On 17-08-2023 01:57 pm, Marc Zyngier wrote:
>>> [Fixing Christoffer's email address]
>>
>> Thanks.
>>>
>>> On Thu, 17 Aug 2023 07:03:14 +0100,
>>> Ganapatrao Kulkarni <gankulkarni at os.amperecomputing.com> wrote:
>>>>
>>>> As per FEAT_ECV, when HCR_EL2.{E2H, TGE} == {1, 1}, Enhanced Counter
>>>> Virtualization functionality is disabled and CNTPOFF_EL2 value is treated
>>>> as zero. On VHE host, E2H and TGE are set, hence it is required
>>>> to adjust CVAL by incrementing it by CNTPOFF_EL2 after guest
>>>> exit to avoid false physical timer interrupts and also
>>>> decrement/restore CVAL before the guest entry.
>>>
>>> No, this is wrong. Neither E2H nor TGE have any impact on writing to
>>> CNTPOFF_EL2, nor does it have an impact on CNTP_CVAL_EL0. Just read
>>> the pseudocode to convince yourself.
>>>
>>> CNTPOFF_EL2 is applied at exactly two points: when SW is reading
>>> CNTPCT_EL0 from EL1 while {E2H,TGE}=={1, 0} and when the HW is
>>> comparing CNTPCT_EL0 with the CNTP_CVAL_EL0. In both cases the offset
>>> is subtracted from the counter. And that's the point where the running
>>> EL matters. Which means that CNTPOFF_EL2 behaves exactly like
>>> CNTVOFF_EL2. No ifs, no buts.
>>
>> As per ARM ARM (ARM DDI 0487J.a page D11-5989)
>> "When FEAT_ECV is implemented, the CNTPOFF_EL2 register allows an
>> offset to be applied to the physical counter, as viewed from EL1 and
>> EL0, and to the EL1 physical timer. The functionality of this 64-bit
>> register is affected by CNTHCTL_EL2.ECV."
>>
>> As per ARM ARM (ARM DDI 0487J.a page D19-7857)
>> "When HCR_EL2.{E2H, TGE} == {1, 1} or SCR_EL3.{NS, EEL2} == {0, 0}, then
>> Enhanced Counter Virtualization functionality is disabled."
>>
>> "The EL1 physical timer interrupt is triggered when ((PCount<63:0> -
>> CNTPOFF_EL2<63:0>) - PCVal<63:0>) is greater than or equal to 0."
>>
>> As per ARM ARM (ARM DDI 0487J.a page D19-7938)
>> "When EL2 is implemented and enabled in the current Security state,
>> the physical counter uses a fixed physical offset of *zero* if any of
>> the following are true:
>> • CNTHCTL_EL2.ECV is 0.
>> • SCR_EL3.ECVEn is 0.
>> • HCR_EL2.{E2H, TGE} is {1, 1}."
>>
>> In VHE host hypervisor, E2H=TGE=1 hence ECV is disabled and Ptimer
>> interrupt is triggered based on PCount<63:0> - PCVal<63:0>
>>
>> Since cval is set by Guest as per offsetted PCounter value and pCount
>> is not subtracted by CNTPOFF when in VHE-L0, results in cval becoming
>> much lesser than physical counter(bumped up since CNTPOFF is zero) and
>> timer interrupt trigger condition is met falsely.
>>
>> There is no issue/impact on cval due to ECV, however it can be/is
>> manipulated to handle this on and off of CNTPOFF/ECV.
>>
>> IIUC, CNTPOFF and CNTVOFF are not same as per specification.
> 
> I owe you an apology. You are correct, and I was totally wrong.  I'm
> truly amazed how wrong we got this part of the architecture, but it is
> way too late for any change, and we'll have to live with it.
> 
Thanks Marc.

> Now, to the actual patch: I think the way you offset CVAL isn't
> great. You should never have to change it on entry, and you should
> instead read the correct value from memory. Then, save/restore of CVAL
> must be amended to always apply the offset. Can you give the hack
> below a go on your HW?

Sure, I will try this and update.

> 
> Thanks,
> 
> 	M.
> 
> diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
> index ea46b4e1e7a8..bb80fdd84676 100644
> --- a/arch/arm64/kvm/arch_timer.c
> +++ b/arch/arm64/kvm/arch_timer.c
> @@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
>   	.get_input_level = kvm_arch_timer_get_input_level,
>   };
>   
> -static bool has_cntpoff(void)
> -{
> -	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
> -}
> -
>   static int nr_timers(struct kvm_vcpu *vcpu)
>   {
>   	if (!vcpu_has_nv2(vcpu))
> @@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
>   	return timecounter->cc->read(timecounter->cc);
>   }
>   
> -static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
> +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
>   {
>   	if (vcpu_has_nv2(vcpu)) {
>   		if (is_hyp_ctxt(vcpu)) {
> @@ -569,8 +564,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
>   		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
>   		cval = read_sysreg_el0(SYS_CNTP_CVAL);
>   
> -		if (!has_cntpoff())
> -			cval -= timer_get_offset(ctx);
> +		cval -= timer_get_offset(ctx);
>   
>   		timer_set_cval(ctx, cval);
>   
> @@ -657,8 +651,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
>   		cval = timer_get_cval(ctx);
>   		offset = timer_get_offset(ctx);
>   		set_cntpoff(offset);
> -		if (!has_cntpoff())
> -			cval += offset;
> +		cval += offset;
>   		write_sysreg_el0(cval, SYS_CNTP_CVAL);
>   		isb();
>   		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
> index 9611b4eaf661..6e3d3e16563f 100644
> --- a/arch/arm64/kvm/hyp/vhe/switch.c
> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
> @@ -90,6 +90,20 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
>   
>   	___activate_traps(vcpu, hcr);
>   
> +	if (has_cntpoff()) {
> +		struct timer_map map;
> +
> +		get_timer_map(vcpu, &map);
> +
> +		if (map.direct_ptimer == vcpu_ptimer(vcpu))
> +			val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
> +		else
> +			val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
> +
> +		isb();
> +		write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
> +	}
> +
>   	val = read_sysreg(cpacr_el1);
>   	val |= CPACR_ELx_TTA;
>   	val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
> @@ -131,6 +145,23 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>   
>   	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>   
> +	if (has_cntpoff()) {
> +		struct timer_map map;
> +		u64 val, offset;
> +
> +		get_timer_map(vcpu, &map);
> +
> +		val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
> +		if (map.direct_ptimer == vcpu_ptimer(vcpu))
> +			__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
> +		else
> +			__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
> +
> +		offset = read_sysreg_s(SYS_CNTPOFF_EL2);
> +		write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
> +		isb();
> +	}
> +
>   	/*
>   	 * ARM errata 1165522 and 1530923 require the actual execution of the
>   	 * above before we can switch to the EL2/EL0 translation regime used by
> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
> index ea77a569a907..86a73ad1446a 100644
> --- a/include/kvm/arm_arch_timer.h
> +++ b/include/kvm/arm_arch_timer.h
> @@ -82,6 +82,8 @@ struct timer_map {
>   	struct arch_timer_context *emul_ptimer;
>   };
>   
> +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map);
> +
>   struct arch_timer_cpu {
>   	struct arch_timer_context timers[NR_KVM_TIMERS];
>   
> @@ -149,4 +151,9 @@ void kvm_timer_cpu_down(void);
>   /* CNTKCTL_EL1 valid bits as of DDI0476J.a */
>   #define CNTKCTL_VALID_BITS	(BIT(17) | GENMASK_ULL(9, 0))
>   
> +static inline bool has_cntpoff(void)
> +{
> +	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
> +}
> +
>   #endif
> 

Thanks,
Ganapat



More information about the linux-arm-kernel mailing list