[PATCH 13/32] KVM: arm64: gic-v5: Add vgic-v5 save/restore hyp interface
Sascha Bischoff
Sascha.Bischoff at arm.com
Wed Dec 17 13:42:51 PST 2025
On Wed, 2025-12-17 at 11:07 +0000, Marc Zyngier wrote:
> On Fri, 12 Dec 2025 15:22:39 +0000,
> Sascha Bischoff <Sascha.Bischoff at arm.com> wrote:
> >
> > Introduce hyp functions to save/restore the following GICv5 state:
> >
> > * ICC_ICSR_EL1
> > * ICH_APR_EL2
> > * ICH_PPI_ACTIVERx_EL2
> > * ICH_PPI_DVIRx_EL2
> > * ICH_PPI_ENABLERx_EL2
> > * ICH_PPI_PENDRRx_EL2
> > * ICH_PPI_PRIORITYRx_EL2
> > * ICH_VMCR_EL2
> >
> > All of these are saved/restored to/from the KVM vgic_v5 CPUIF
> > shadow
> > state.
> >
> > The ICSR must be save/restored as this register is shared between
> > host
> > and guest. Therefore, to avoid leaking host state to the guest,
> > this
> > must be saved and restored. Moreover, as this can by used by the
> > host
> > at any time, it must be save/restored eagerly. Note: the host state
> > is
> > not preserved as the host should only use this register when
> > preemption is disabled.
> >
> > As part of restoring the ICH_VMCR_EL2 and ICH_APR_EL2, GICv3-compat
> > mode is also disabled by setting the ICH_VCTLR_EL2.V3 bit to 0. The
> > correspoinding GICv3-compat mode enable is part of the VMCR & APR
> > restore for a GICv3 guest as it only takes effect when actually
> > running a guest.
> >
> > Co-authored-by: Timothy Hayes <timothy.hayes at arm.com>
> > Signed-off-by: Timothy Hayes <timothy.hayes at arm.com>
> > Signed-off-by: Sascha Bischoff <sascha.bischoff at arm.com>
> > ---
> > arch/arm64/include/asm/kvm_asm.h | 4 +
> > arch/arm64/include/asm/kvm_hyp.h | 8 ++
> > arch/arm64/kvm/hyp/nvhe/Makefile | 2 +-
> > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 32 ++++++
> > arch/arm64/kvm/hyp/vgic-v5.c | 155
> > +++++++++++++++++++++++++++++
> > arch/arm64/kvm/hyp/vhe/Makefile | 2 +-
> > include/kvm/arm_vgic.h | 28 ++++++
> > 7 files changed, 229 insertions(+), 2 deletions(-)
> > create mode 100644 arch/arm64/kvm/hyp/vgic-v5.c
> >
> > diff --git a/arch/arm64/include/asm/kvm_asm.h
> > b/arch/arm64/include/asm/kvm_asm.h
> > index a1ad12c72ebf1..5f669299fb956 100644
> > --- a/arch/arm64/include/asm/kvm_asm.h
> > +++ b/arch/arm64/include/asm/kvm_asm.h
> > @@ -89,6 +89,10 @@ enum __kvm_host_smccc_func {
> > __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
> > __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
> > __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
> > + __KVM_HOST_SMCCC_FUNC___vgic_v5_save_vmcr_aprs,
>
> As we recently found out with v2/v3, delaying saving VMCR (and
> therefore lobbing it with the APRs) is a bad idea if you need to look
> at it from inside the run loop.
>
> In general, please align the behaviour with the existing
> infrastructure as often as possible. We can always optimise things
> later once GICv5 becomes relevant.
Done, and noted!
> > + __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_aprs,
> > + __KVM_HOST_SMCCC_FUNC___vgic_v5_save_ppi_state,
> > + __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_ppi_state,
> > };
> >
> > #define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
> > diff --git a/arch/arm64/include/asm/kvm_hyp.h
> > b/arch/arm64/include/asm/kvm_hyp.h
> > index 76ce2b94bd97e..f6cf59a719ac6 100644
> > --- a/arch/arm64/include/asm/kvm_hyp.h
> > +++ b/arch/arm64/include/asm/kvm_hyp.h
> > @@ -87,6 +87,14 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if
> > *cpu_if);
> > void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
> > int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
> >
> > +/* GICv5 */
> > +void __vgic_v5_save_vmcr_aprs(struct vgic_v5_cpu_if *cpu_if);
> > +void __vgic_v5_restore_vmcr_aprs(struct vgic_v5_cpu_if *cpu_if);
> > +void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if);
> > +void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if);
> > +void __vgic_v5_save_icsr(struct vgic_v5_cpu_if *cpu_if);
> > +void __vgic_v5_restore_icsr(struct vgic_v5_cpu_if *cpu_if);
> > +
> > #ifdef __KVM_NVHE_HYPERVISOR__
> > void __timer_enable_traps(struct kvm_vcpu *vcpu);
> > void __timer_disable_traps(struct kvm_vcpu *vcpu);
> > diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile
> > b/arch/arm64/kvm/hyp/nvhe/Makefile
> > index a244ec25f8c5b..d860fbe9bc476 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> > +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> > @@ -26,7 +26,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o
> > switch.o tlb.o hyp-init.o host.o
> > hyp-main.o hyp-smp.o psci-relay.o early_alloc.o
> > page_alloc.o \
> > cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
> > stacktrace.o ffa.o
> > hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o
> > ../entry.o \
> > - ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
> > + ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
> > ../vgic-v5.o
> > hyp-obj-y += ../../../kernel/smccc-call.o
> > hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
> > hyp-obj-y += $(lib-objs)
> > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > index a7c689152f686..6bc5a4f75fd01 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > @@ -586,6 +586,34 @@ static void handle___pkvm_teardown_vm(struct
> > kvm_cpu_context *host_ctxt)
> > cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
> > }
> >
> > +static void handle___vgic_v5_save_vmcr_aprs(struct kvm_cpu_context
> > *host_ctxt)
> > +{
> > + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt,
> > 1);
> > +
> > + __vgic_v5_save_vmcr_aprs(kern_hyp_va(cpu_if));
> > +}
> > +
> > +static void handle___vgic_v5_restore_vmcr_aprs(struct
> > kvm_cpu_context *host_ctxt)
> > +{
> > + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt,
> > 1);
> > +
> > + __vgic_v5_restore_vmcr_aprs(kern_hyp_va(cpu_if));
> > +}
> > +
> > +static void handle___vgic_v5_save_ppi_state(struct kvm_cpu_context
> > *host_ctxt)
> > +{
> > + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt,
> > 1);
> > +
> > + __vgic_v5_save_ppi_state(kern_hyp_va(cpu_if));
> > +}
> > +
> > +static void handle___vgic_v5_restore_ppi_state(struct
> > kvm_cpu_context *host_ctxt)
> > +{
> > + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt,
> > 1);
> > +
> > + __vgic_v5_restore_ppi_state(kern_hyp_va(cpu_if));
> > +}
> > +
> > typedef void (*hcall_t)(struct kvm_cpu_context *);
> >
> > #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] =
> > (hcall_t)handle_##x
> > @@ -627,6 +655,10 @@ static const hcall_t host_hcall[] = {
> > HANDLE_FUNC(__pkvm_vcpu_load),
> > HANDLE_FUNC(__pkvm_vcpu_put),
> > HANDLE_FUNC(__pkvm_tlb_flush_vmid),
> > + HANDLE_FUNC(__vgic_v5_save_vmcr_aprs),
> > + HANDLE_FUNC(__vgic_v5_restore_vmcr_aprs),
> > + HANDLE_FUNC(__vgic_v5_save_ppi_state),
> > + HANDLE_FUNC(__vgic_v5_restore_ppi_state),
> > };
> >
> > static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
> > diff --git a/arch/arm64/kvm/hyp/vgic-v5.c
> > b/arch/arm64/kvm/hyp/vgic-v5.c
> > new file mode 100644
> > index 0000000000000..11b67ae09e326
> > --- /dev/null
> > +++ b/arch/arm64/kvm/hyp/vgic-v5.c
>
> maz at valley-girl:~/hot-poop/arm-platforms$ find . -name vgic-v5.c
> ./arch/arm64/kvm/vgic/vgic-v5.c
> ./arch/arm64/kvm/hyp/vgic-v5.c
>
> It doesn't look like much, but that's *very* annoying. Which is why
> we
> have vgic-v3.c on one side, and vgic-v3-sr.c on the other. Consider
> doing the same thing here.
Have done this.
> > @@ -0,0 +1,155 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2025 - ARM Ltd
> > + */
> > +
> > +#include <hyp/adjust_pc.h>
> > +
> > +#include <linux/compiler.h>
> > +#include <linux/irqchip/arm-gic-v5.h>
> > +#include <linux/kvm_host.h>
> > +
> > +#include <asm/kvm_emulate.h>
> > +#include <asm/kvm_hyp.h>
> > +#include <asm/kvm_mmu.h>
> > +
> > +void __vgic_v5_save_vmcr_aprs(struct vgic_v5_cpu_if *cpu_if)
> > +{
> > + cpu_if->vgic_vmcr = read_sysreg_s(SYS_ICH_VMCR_EL2);
> > + cpu_if->vgic_apr = read_sysreg_s(SYS_ICH_APR_EL2);
> > +}
> > +
> > +static void __vgic_v5_compat_mode_disable(void)
> > +{
> > + sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, ICH_VCTLR_EL2_V3,
> > 0);
> > + isb();
> > +}
> > +
> > +void __vgic_v5_restore_vmcr_aprs(struct vgic_v5_cpu_if *cpu_if)
> > +{
> > + __vgic_v5_compat_mode_disable();
> > +
> > + write_sysreg_s(cpu_if->vgic_vmcr, SYS_ICH_VMCR_EL2);
> > + write_sysreg_s(cpu_if->vgic_apr, SYS_ICH_APR_EL2);
> > +}
> > +
> > +void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
> > +{
> > + cpu_if->vgic_ppi_activer_exit[0] =
> > + read_sysreg_s(SYS_ICH_PPI_ACTIVER0_EL2);
> > + cpu_if->vgic_ppi_activer_exit[1] =
> > + read_sysreg_s(SYS_ICH_PPI_ACTIVER1_EL2);
>
> Please don't break assignments. Long lines are fine.
Noted!
> > +
> > + cpu_if->vgic_ich_ppi_enabler_exit[0] =
> > + read_sysreg_s(SYS_ICH_PPI_ENABLER0_EL2);
> > + cpu_if->vgic_ich_ppi_enabler_exit[1] =
> > + read_sysreg_s(SYS_ICH_PPI_ENABLER1_EL2);
> > +
> > + cpu_if->vgic_ppi_pendr_exit[0] =
> > read_sysreg_s(SYS_ICH_PPI_PENDR0_EL2);
> > + cpu_if->vgic_ppi_pendr_exit[1] =
> > read_sysreg_s(SYS_ICH_PPI_PENDR1_EL2);
> > +
> > + cpu_if->vgic_ppi_priorityr[0] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR0_EL2);
> > + cpu_if->vgic_ppi_priorityr[1] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR1_EL2);
> > + cpu_if->vgic_ppi_priorityr[2] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR2_EL2);
> > + cpu_if->vgic_ppi_priorityr[3] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR3_EL2);
> > + cpu_if->vgic_ppi_priorityr[4] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR4_EL2);
> > + cpu_if->vgic_ppi_priorityr[5] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR5_EL2);
> > + cpu_if->vgic_ppi_priorityr[6] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR6_EL2);
> > + cpu_if->vgic_ppi_priorityr[7] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR7_EL2);
> > + cpu_if->vgic_ppi_priorityr[8] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR8_EL2);
> > + cpu_if->vgic_ppi_priorityr[9] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR9_EL2);
> > + cpu_if->vgic_ppi_priorityr[10] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR10_EL2);
> > + cpu_if->vgic_ppi_priorityr[11] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR11_EL2);
> > + cpu_if->vgic_ppi_priorityr[12] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR12_EL2);
> > + cpu_if->vgic_ppi_priorityr[13] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR13_EL2);
> > + cpu_if->vgic_ppi_priorityr[14] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR14_EL2);
> > + cpu_if->vgic_ppi_priorityr[15] =
> > + read_sysreg_s(SYS_ICH_PPI_PRIORITYR15_EL2);
>
> There is clearly scope for optimisation here. The most likely case is
> that we will only care about a handful of PPIs, all grouped in the
> first 4 registers. That's a good reason to track the PPIs that the
> guest can see.
>
Agreed. Will most likely save this optimisation for a future round.
Once everything is back to a working state again!
> > +
> > + /* Now that we are done, disable DVI */
> > + write_sysreg_s(0, SYS_ICH_PPI_DVIR0_EL2);
> > + write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
> > +}
> > +
> > +void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
> > +{
> > + /* Now enable DVI so that the guest's interrupt config
> > takes over */
> > + write_sysreg_s(cpu_if->vgic_ppi_dvir[0],
> > SYS_ICH_PPI_DVIR0_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_dvir[1],
> > SYS_ICH_PPI_DVIR1_EL2);
> > +
> > + write_sysreg_s(cpu_if->vgic_ppi_activer_entry[0],
> > + SYS_ICH_PPI_ACTIVER0_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_activer_entry[1],
> > + SYS_ICH_PPI_ACTIVER1_EL2);
> > +
> > + write_sysreg_s(cpu_if->vgic_ich_ppi_enabler_entry[0],
> > + SYS_ICH_PPI_ENABLER0_EL2);
> > + write_sysreg_s(cpu_if->vgic_ich_ppi_enabler_entry[1],
> > + SYS_ICH_PPI_ENABLER1_EL2);
> > +
> > + /* Update the pending state of the NON-DVI'd PPIs, only
> > */
> > + write_sysreg_s(cpu_if->vgic_ppi_pendr_entry[0] &
> > + ~cpu_if->vgic_ppi_dvir[0],
>
> Again, don't insert line breaks in logical operations.
>
Done.
> > + SYS_ICH_PPI_PENDR0_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_pendr_entry[1] &
> > + ~cpu_if->vgic_ppi_dvir[1],
> > + SYS_ICH_PPI_PENDR1_EL2);
> > +
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[0],
> > + SYS_ICH_PPI_PRIORITYR0_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[1],
> > + SYS_ICH_PPI_PRIORITYR1_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[2],
> > + SYS_ICH_PPI_PRIORITYR2_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[3],
> > + SYS_ICH_PPI_PRIORITYR3_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[4],
> > + SYS_ICH_PPI_PRIORITYR4_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[5],
> > + SYS_ICH_PPI_PRIORITYR5_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[6],
> > + SYS_ICH_PPI_PRIORITYR6_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[7],
> > + SYS_ICH_PPI_PRIORITYR7_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[8],
> > + SYS_ICH_PPI_PRIORITYR8_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[9],
> > + SYS_ICH_PPI_PRIORITYR9_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[10],
> > + SYS_ICH_PPI_PRIORITYR10_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[11],
> > + SYS_ICH_PPI_PRIORITYR11_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[12],
> > + SYS_ICH_PPI_PRIORITYR12_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[13],
> > + SYS_ICH_PPI_PRIORITYR13_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[14],
> > + SYS_ICH_PPI_PRIORITYR14_EL2);
> > + write_sysreg_s(cpu_if->vgic_ppi_priorityr[15],
> > + SYS_ICH_PPI_PRIORITYR15_EL2);
> > +}
> > +
> > +void __vgic_v5_save_icsr(struct vgic_v5_cpu_if *cpu_if)
> > +{
> > + cpu_if->vgic_icsr = read_sysreg_s(SYS_ICC_ICSR_EL1);
> > +}
> > +
> > +void __vgic_v5_restore_icsr(struct vgic_v5_cpu_if *cpu_if)
> > +{
> > + write_sysreg_s(cpu_if->vgic_icsr, SYS_ICC_ICSR_EL1);
> > +}
> > diff --git a/arch/arm64/kvm/hyp/vhe/Makefile
> > b/arch/arm64/kvm/hyp/vhe/Makefile
> > index afc4aed9231ac..fcf5e68ab591c 100644
> > --- a/arch/arm64/kvm/hyp/vhe/Makefile
> > +++ b/arch/arm64/kvm/hyp/vhe/Makefile
> > @@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init
> >
> > obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
> > obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o
> > ../entry.o \
> > - ../fpsimd.o ../hyp-entry.o ../exception.o
> > + ../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5.o
> > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> > index fbbaef4ad2114..525c8b83e83c9 100644
> > --- a/include/kvm/arm_vgic.h
> > +++ b/include/kvm/arm_vgic.h
> > @@ -359,7 +359,35 @@ struct vgic_v3_cpu_if {
> > };
> >
> > struct vgic_v5_cpu_if {
> > + u64 vgic_apr;
> > + u64 vgic_vmcr;
> > +
> > + /* PPI register state */
> > u64 vgic_ppi_hmr[2];
> > + u64 vgic_ppi_dvir[2];
> > + u64 vgic_ppi_priorityr[16];
> > +
> > + /* The pending state of the guest. This is merged with the
> > exit state */
> > + u64 vgic_ppi_pendr[2];
> > +
> > + /* The state flushed to the regs when entering the guest
> > */
> > + u64 vgic_ppi_activer_entry[2];
> > + u64 vgic_ich_ppi_enabler_entry[2];
> > + u64 vgic_ppi_pendr_entry[2];
> > +
> > + /* The saved state of the regs when leaving the guest */
> > + u64 vgic_ppi_activer_exit[2];
> > + u64 vgic_ich_ppi_enabler_exit[2];
> > + u64 vgic_ppi_pendr_exit[2];
>
> See my comment on patch 17, requesting to make these entry/exit
> states
> per CPU, and not per vcpu.
>
Yeah, I'll look into this for sure. It makes sense to do it that way.
This means that when we're outside of the guest, all of that mess
disappears, and the per-vcpu state just reflects what we're going to
present to the guest on next entry.
Thanks as always,
Sascha
> > +
> > + /*
> > + * The ICSR is re-used across host and guest, and hence it
> > needs to be
> > + * saved/restored. Only one copy is required as the host
> > should block
> > + * preemption between executing GIC CDRCFG and acccessing
> > the
> > + * ICC_ICSR_EL1. A guest, of course, can never guarantee
> > this, and hence
> > + * it is the hyp's responsibility to keep the state
> > constistent.
> > + */
> > + u64 vgic_icsr;
> > };
> >
> > struct vgic_cpu {
>
> Thanks,
>
> M.
>
More information about the linux-arm-kernel
mailing list