[PATCH 13/32] KVM: arm64: gic-v5: Add vgic-v5 save/restore hyp interface
Marc Zyngier
maz at kernel.org
Wed Dec 17 03:07:04 PST 2025
On Fri, 12 Dec 2025 15:22:39 +0000,
Sascha Bischoff <Sascha.Bischoff at arm.com> wrote:
>
> Introduce hyp functions to save/restore the following GICv5 state:
>
> * ICC_ICSR_EL1
> * ICH_APR_EL2
> * ICH_PPI_ACTIVERx_EL2
> * ICH_PPI_DVIRx_EL2
> * ICH_PPI_ENABLERx_EL2
> * ICH_PPI_PENDRRx_EL2
> * ICH_PPI_PRIORITYRx_EL2
> * ICH_VMCR_EL2
>
> All of these are saved/restored to/from the KVM vgic_v5 CPUIF shadow
> state.
>
> The ICSR must be save/restored as this register is shared between host
> and guest. Therefore, to avoid leaking host state to the guest, this
> must be saved and restored. Moreover, as this can by used by the host
> at any time, it must be save/restored eagerly. Note: the host state is
> not preserved as the host should only use this register when
> preemption is disabled.
>
> As part of restoring the ICH_VMCR_EL2 and ICH_APR_EL2, GICv3-compat
> mode is also disabled by setting the ICH_VCTLR_EL2.V3 bit to 0. The
> correspoinding GICv3-compat mode enable is part of the VMCR & APR
> restore for a GICv3 guest as it only takes effect when actually
> running a guest.
>
> Co-authored-by: Timothy Hayes <timothy.hayes at arm.com>
> Signed-off-by: Timothy Hayes <timothy.hayes at arm.com>
> Signed-off-by: Sascha Bischoff <sascha.bischoff at arm.com>
> ---
> arch/arm64/include/asm/kvm_asm.h | 4 +
> arch/arm64/include/asm/kvm_hyp.h | 8 ++
> arch/arm64/kvm/hyp/nvhe/Makefile | 2 +-
> arch/arm64/kvm/hyp/nvhe/hyp-main.c | 32 ++++++
> arch/arm64/kvm/hyp/vgic-v5.c | 155 +++++++++++++++++++++++++++++
> arch/arm64/kvm/hyp/vhe/Makefile | 2 +-
> include/kvm/arm_vgic.h | 28 ++++++
> 7 files changed, 229 insertions(+), 2 deletions(-)
> create mode 100644 arch/arm64/kvm/hyp/vgic-v5.c
>
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index a1ad12c72ebf1..5f669299fb956 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -89,6 +89,10 @@ enum __kvm_host_smccc_func {
> __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
> __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
> __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
> + __KVM_HOST_SMCCC_FUNC___vgic_v5_save_vmcr_aprs,
As we recently found out with v2/v3, delaying saving VMCR (and
therefore lobbing it with the APRs) is a bad idea if you need to look
at it from inside the run loop.
In general, please align the behaviour with the existing
infrastructure as often as possible. We can always optimise things
later once GICv5 becomes relevant.
> + __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_aprs,
> + __KVM_HOST_SMCCC_FUNC___vgic_v5_save_ppi_state,
> + __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_ppi_state,
> };
>
> #define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
> diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> index 76ce2b94bd97e..f6cf59a719ac6 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -87,6 +87,14 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
> void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
> int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
>
> +/* GICv5 */
> +void __vgic_v5_save_vmcr_aprs(struct vgic_v5_cpu_if *cpu_if);
> +void __vgic_v5_restore_vmcr_aprs(struct vgic_v5_cpu_if *cpu_if);
> +void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if);
> +void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if);
> +void __vgic_v5_save_icsr(struct vgic_v5_cpu_if *cpu_if);
> +void __vgic_v5_restore_icsr(struct vgic_v5_cpu_if *cpu_if);
> +
> #ifdef __KVM_NVHE_HYPERVISOR__
> void __timer_enable_traps(struct kvm_vcpu *vcpu);
> void __timer_disable_traps(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
> index a244ec25f8c5b..d860fbe9bc476 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -26,7 +26,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
> hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
> cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
> hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
> - ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
> + ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5.o
> hyp-obj-y += ../../../kernel/smccc-call.o
> hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
> hyp-obj-y += $(lib-objs)
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index a7c689152f686..6bc5a4f75fd01 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -586,6 +586,34 @@ static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
> cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
> }
>
> +static void handle___vgic_v5_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
> +{
> + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
> +
> + __vgic_v5_save_vmcr_aprs(kern_hyp_va(cpu_if));
> +}
> +
> +static void handle___vgic_v5_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
> +{
> + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
> +
> + __vgic_v5_restore_vmcr_aprs(kern_hyp_va(cpu_if));
> +}
> +
> +static void handle___vgic_v5_save_ppi_state(struct kvm_cpu_context *host_ctxt)
> +{
> + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
> +
> + __vgic_v5_save_ppi_state(kern_hyp_va(cpu_if));
> +}
> +
> +static void handle___vgic_v5_restore_ppi_state(struct kvm_cpu_context *host_ctxt)
> +{
> + DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
> +
> + __vgic_v5_restore_ppi_state(kern_hyp_va(cpu_if));
> +}
> +
> typedef void (*hcall_t)(struct kvm_cpu_context *);
>
> #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
> @@ -627,6 +655,10 @@ static const hcall_t host_hcall[] = {
> HANDLE_FUNC(__pkvm_vcpu_load),
> HANDLE_FUNC(__pkvm_vcpu_put),
> HANDLE_FUNC(__pkvm_tlb_flush_vmid),
> + HANDLE_FUNC(__vgic_v5_save_vmcr_aprs),
> + HANDLE_FUNC(__vgic_v5_restore_vmcr_aprs),
> + HANDLE_FUNC(__vgic_v5_save_ppi_state),
> + HANDLE_FUNC(__vgic_v5_restore_ppi_state),
> };
>
> static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
> diff --git a/arch/arm64/kvm/hyp/vgic-v5.c b/arch/arm64/kvm/hyp/vgic-v5.c
> new file mode 100644
> index 0000000000000..11b67ae09e326
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/vgic-v5.c
maz at valley-girl:~/hot-poop/arm-platforms$ find . -name vgic-v5.c
./arch/arm64/kvm/vgic/vgic-v5.c
./arch/arm64/kvm/hyp/vgic-v5.c
It doesn't look like much, but that's *very* annoying. Which is why we
have vgic-v3.c on one side, and vgic-v3-sr.c on the other. Consider
doing the same thing here.
> @@ -0,0 +1,155 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2025 - ARM Ltd
> + */
> +
> +#include <hyp/adjust_pc.h>
> +
> +#include <linux/compiler.h>
> +#include <linux/irqchip/arm-gic-v5.h>
> +#include <linux/kvm_host.h>
> +
> +#include <asm/kvm_emulate.h>
> +#include <asm/kvm_hyp.h>
> +#include <asm/kvm_mmu.h>
> +
> +void __vgic_v5_save_vmcr_aprs(struct vgic_v5_cpu_if *cpu_if)
> +{
> + cpu_if->vgic_vmcr = read_sysreg_s(SYS_ICH_VMCR_EL2);
> + cpu_if->vgic_apr = read_sysreg_s(SYS_ICH_APR_EL2);
> +}
> +
> +static void __vgic_v5_compat_mode_disable(void)
> +{
> + sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, ICH_VCTLR_EL2_V3, 0);
> + isb();
> +}
> +
> +void __vgic_v5_restore_vmcr_aprs(struct vgic_v5_cpu_if *cpu_if)
> +{
> + __vgic_v5_compat_mode_disable();
> +
> + write_sysreg_s(cpu_if->vgic_vmcr, SYS_ICH_VMCR_EL2);
> + write_sysreg_s(cpu_if->vgic_apr, SYS_ICH_APR_EL2);
> +}
> +
> +void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
> +{
> + cpu_if->vgic_ppi_activer_exit[0] =
> + read_sysreg_s(SYS_ICH_PPI_ACTIVER0_EL2);
> + cpu_if->vgic_ppi_activer_exit[1] =
> + read_sysreg_s(SYS_ICH_PPI_ACTIVER1_EL2);
Please don't break assignments. Long lines are fine.
> +
> + cpu_if->vgic_ich_ppi_enabler_exit[0] =
> + read_sysreg_s(SYS_ICH_PPI_ENABLER0_EL2);
> + cpu_if->vgic_ich_ppi_enabler_exit[1] =
> + read_sysreg_s(SYS_ICH_PPI_ENABLER1_EL2);
> +
> + cpu_if->vgic_ppi_pendr_exit[0] = read_sysreg_s(SYS_ICH_PPI_PENDR0_EL2);
> + cpu_if->vgic_ppi_pendr_exit[1] = read_sysreg_s(SYS_ICH_PPI_PENDR1_EL2);
> +
> + cpu_if->vgic_ppi_priorityr[0] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR0_EL2);
> + cpu_if->vgic_ppi_priorityr[1] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR1_EL2);
> + cpu_if->vgic_ppi_priorityr[2] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR2_EL2);
> + cpu_if->vgic_ppi_priorityr[3] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR3_EL2);
> + cpu_if->vgic_ppi_priorityr[4] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR4_EL2);
> + cpu_if->vgic_ppi_priorityr[5] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR5_EL2);
> + cpu_if->vgic_ppi_priorityr[6] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR6_EL2);
> + cpu_if->vgic_ppi_priorityr[7] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR7_EL2);
> + cpu_if->vgic_ppi_priorityr[8] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR8_EL2);
> + cpu_if->vgic_ppi_priorityr[9] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR9_EL2);
> + cpu_if->vgic_ppi_priorityr[10] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR10_EL2);
> + cpu_if->vgic_ppi_priorityr[11] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR11_EL2);
> + cpu_if->vgic_ppi_priorityr[12] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR12_EL2);
> + cpu_if->vgic_ppi_priorityr[13] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR13_EL2);
> + cpu_if->vgic_ppi_priorityr[14] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR14_EL2);
> + cpu_if->vgic_ppi_priorityr[15] =
> + read_sysreg_s(SYS_ICH_PPI_PRIORITYR15_EL2);
There is clearly scope for optimisation here. The most likely case is
that we will only care about a handful of PPIs, all grouped in the
first 4 registers. That's a good reason to track the PPIs that the
guest can see.
> +
> + /* Now that we are done, disable DVI */
> + write_sysreg_s(0, SYS_ICH_PPI_DVIR0_EL2);
> + write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
> +}
> +
> +void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
> +{
> + /* Now enable DVI so that the guest's interrupt config takes over */
> + write_sysreg_s(cpu_if->vgic_ppi_dvir[0], SYS_ICH_PPI_DVIR0_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_dvir[1], SYS_ICH_PPI_DVIR1_EL2);
> +
> + write_sysreg_s(cpu_if->vgic_ppi_activer_entry[0],
> + SYS_ICH_PPI_ACTIVER0_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_activer_entry[1],
> + SYS_ICH_PPI_ACTIVER1_EL2);
> +
> + write_sysreg_s(cpu_if->vgic_ich_ppi_enabler_entry[0],
> + SYS_ICH_PPI_ENABLER0_EL2);
> + write_sysreg_s(cpu_if->vgic_ich_ppi_enabler_entry[1],
> + SYS_ICH_PPI_ENABLER1_EL2);
> +
> + /* Update the pending state of the NON-DVI'd PPIs, only */
> + write_sysreg_s(cpu_if->vgic_ppi_pendr_entry[0] &
> + ~cpu_if->vgic_ppi_dvir[0],
Again, don't insert line breaks in logical operations.
> + SYS_ICH_PPI_PENDR0_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_pendr_entry[1] &
> + ~cpu_if->vgic_ppi_dvir[1],
> + SYS_ICH_PPI_PENDR1_EL2);
> +
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[0],
> + SYS_ICH_PPI_PRIORITYR0_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[1],
> + SYS_ICH_PPI_PRIORITYR1_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[2],
> + SYS_ICH_PPI_PRIORITYR2_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[3],
> + SYS_ICH_PPI_PRIORITYR3_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[4],
> + SYS_ICH_PPI_PRIORITYR4_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[5],
> + SYS_ICH_PPI_PRIORITYR5_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[6],
> + SYS_ICH_PPI_PRIORITYR6_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[7],
> + SYS_ICH_PPI_PRIORITYR7_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[8],
> + SYS_ICH_PPI_PRIORITYR8_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[9],
> + SYS_ICH_PPI_PRIORITYR9_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[10],
> + SYS_ICH_PPI_PRIORITYR10_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[11],
> + SYS_ICH_PPI_PRIORITYR11_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[12],
> + SYS_ICH_PPI_PRIORITYR12_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[13],
> + SYS_ICH_PPI_PRIORITYR13_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[14],
> + SYS_ICH_PPI_PRIORITYR14_EL2);
> + write_sysreg_s(cpu_if->vgic_ppi_priorityr[15],
> + SYS_ICH_PPI_PRIORITYR15_EL2);
> +}
> +
> +void __vgic_v5_save_icsr(struct vgic_v5_cpu_if *cpu_if)
> +{
> + cpu_if->vgic_icsr = read_sysreg_s(SYS_ICC_ICSR_EL1);
> +}
> +
> +void __vgic_v5_restore_icsr(struct vgic_v5_cpu_if *cpu_if)
> +{
> + write_sysreg_s(cpu_if->vgic_icsr, SYS_ICC_ICSR_EL1);
> +}
> diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
> index afc4aed9231ac..fcf5e68ab591c 100644
> --- a/arch/arm64/kvm/hyp/vhe/Makefile
> +++ b/arch/arm64/kvm/hyp/vhe/Makefile
> @@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init
>
> obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
> obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
> - ../fpsimd.o ../hyp-entry.o ../exception.o
> + ../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5.o
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index fbbaef4ad2114..525c8b83e83c9 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -359,7 +359,35 @@ struct vgic_v3_cpu_if {
> };
>
> struct vgic_v5_cpu_if {
> + u64 vgic_apr;
> + u64 vgic_vmcr;
> +
> + /* PPI register state */
> u64 vgic_ppi_hmr[2];
> + u64 vgic_ppi_dvir[2];
> + u64 vgic_ppi_priorityr[16];
> +
> + /* The pending state of the guest. This is merged with the exit state */
> + u64 vgic_ppi_pendr[2];
> +
> + /* The state flushed to the regs when entering the guest */
> + u64 vgic_ppi_activer_entry[2];
> + u64 vgic_ich_ppi_enabler_entry[2];
> + u64 vgic_ppi_pendr_entry[2];
> +
> + /* The saved state of the regs when leaving the guest */
> + u64 vgic_ppi_activer_exit[2];
> + u64 vgic_ich_ppi_enabler_exit[2];
> + u64 vgic_ppi_pendr_exit[2];
See my comment on patch 17, requesting to make these entry/exit states
per CPU, and not per vcpu.
> +
> + /*
> + * The ICSR is re-used across host and guest, and hence it needs to be
> + * saved/restored. Only one copy is required as the host should block
> + * preemption between executing GIC CDRCFG and acccessing the
> + * ICC_ICSR_EL1. A guest, of course, can never guarantee this, and hence
> + * it is the hyp's responsibility to keep the state constistent.
> + */
> + u64 vgic_icsr;
> };
>
> struct vgic_cpu {
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
More information about the linux-arm-kernel
mailing list