[PATCH v4 05/20] KVM: arm64: timers: Allow physical offset without CNTPOFF_EL2
Zenghui Yu
yuzenghui at huawei.com
Wed Jul 9 01:12:18 PDT 2025
[ Record some interesting bits noticed while testing
arch_timer_edge_cases. ]
On 2023/3/31 1:47, Marc Zyngier wrote:
> CNTPOFF_EL2 is awesome, but it is mostly vapourware, and no publicly
> available implementation has it. So for the common mortals, let's
> implement the emulated version of this thing.
>
> It means trapping accesses to the physical counter and timer, and
> emulate some of it as necessary.
>
> As for CNTPOFF_EL2, nobody sets the offset yet.
>
> Reviewed-by: Colton Lewis <coltonlewis at google.com>
> Signed-off-by: Marc Zyngier <maz at kernel.org>
> ---
> arch/arm64/include/asm/sysreg.h | 2 +
> arch/arm64/kvm/arch_timer.c | 98 +++++++++++++++++++++++-------
> arch/arm64/kvm/hyp/nvhe/timer-sr.c | 18 ++++--
> arch/arm64/kvm/sys_regs.c | 9 +++
> 4 files changed, 98 insertions(+), 29 deletions(-)
>
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 9e3ecba3c4e6..f8da9e1b0c11 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -388,6 +388,7 @@
>
> #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
>
> +#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
> #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
> #define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
>
> @@ -400,6 +401,7 @@
>
> #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
> #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
> +#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
> #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
>
> #define __PMEV_op2(n) ((n) & 0x7)
> diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
> index 3118ea0a1b41..bb64a71ae193 100644
> --- a/arch/arm64/kvm/arch_timer.c
> +++ b/arch/arm64/kvm/arch_timer.c
> @@ -458,6 +458,8 @@ static void timer_save_state(struct arch_timer_context *ctx)
> goto out;
>
> switch (index) {
> + u64 cval;
> +
> case TIMER_VTIMER:
> timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
> timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
> @@ -485,7 +487,12 @@ static void timer_save_state(struct arch_timer_context *ctx)
> break;
> case TIMER_PTIMER:
> timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
> - timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL));
> + cval = read_sysreg_el0(SYS_CNTP_CVAL);
> +
> + if (!has_cntpoff())
> + cval -= timer_get_offset(ctx);
> +
> + timer_set_cval(ctx, cval);
>
> /* Disable the timer */
> write_sysreg_el0(0, SYS_CNTP_CTL);
> @@ -555,6 +562,8 @@ static void timer_restore_state(struct arch_timer_context *ctx)
> goto out;
>
> switch (index) {
> + u64 cval, offset;
> +
> case TIMER_VTIMER:
> set_cntvoff(timer_get_offset(ctx));
> write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
> @@ -562,8 +571,12 @@ static void timer_restore_state(struct arch_timer_context *ctx)
> write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
> break;
> case TIMER_PTIMER:
> - set_cntpoff(timer_get_offset(ctx));
> - write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL);
> + cval = timer_get_cval(ctx);
> + offset = timer_get_offset(ctx);
> + set_cntpoff(offset);
> + if (!has_cntpoff())
> + cval += offset;
> + write_sysreg_el0(cval, SYS_CNTP_CVAL);
> isb();
> write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
> break;
I tested arch_timer_edge_cases on my Kunpeng920 (has VHE, doesn't have
ECV) and noticed that the test_timer_cval() below takes a long period to
return when testing the _physical_ timer.
static void test_timers_in_the_past(enum arch_timer timer)
{
[...]
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
irq_wait_method_t wm = irq_wait_method[i];
[...]
/* Set a timer to counter=0 (in the past) */
test_timer_cval(timer, 0, wm, DEF_CNT);
The comment is obviously wrong. It should say "Set a timer to CVAL=0".
No physical timer interrupt ("kvm guest ptimer") was triggered when I
executed it separately.
Let me try to explain _this_ test_timer_cval() in a bit more detail.
|Guest KVM
|----- ---
|local_irq_disable()
|
|reset_timer_state()
| set_counter() // SET_ONE_REG via user-space
| // for KVM_REG_ARM_PTIMER_CNT
| kvm_arm_timer_set_reg()
| timer_set_offset() [1]
| timer_set_ctl()
| MSR CNTP_CTL_EL0, IMASK
|
|set_xval_irq()
| timer_set_cval()
| MSR CNTP_CVAL_EL0, cval=0
| timer_set_ctl()
| MSR CNTP_CTL_EL0, ENABLE // trap
| kvm_arm_timer_write_sysreg()
| timer_save_state()
| kvm_arm_timer_write()
| timer_restore_state() [2]
|
|/* This method re-enables IRQs to handle the one we're looking for. */
|wm()
|
|assert_irqs_handled(1)
|local_irq_enable()
[1] kvm_phys_timer_read() = 0x7895c0ab2
value = 0x7fffffffffffff
offset = kvm_phys_timer_read() - value = 0xff800007895c0ab3
... which was observed in my test.
[2] cval += offset; // cval = 0xff800007895c0ab3
kvm_phys_timer_read() = 0x7895c1b86
No ptimer interrupt was triggered with that. And we relied on the next
kvm_timer_vcpu_load() to catch the timer expiration and inject an
interrupt to the guest..
It's apparent that this test case is not a practical use case. Not sure
if we should (/can) emulate it properly. Or I may have already missed
some obvious points.
Thanks,
Zenghui
More information about the linux-arm-kernel
mailing list