[PATCH v5 15/36] KVM: arm64: gic-v5: Implement GICv5 load/put and save/restore

Sascha Bischoff Sascha.Bischoff at arm.com
Wed Mar 4 06:21:30 PST 2026


On Wed, 2026-03-04 at 09:26 +0000, Marc Zyngier wrote:
> On Thu, 26 Feb 2026 15:59:18 +0000,
> Sascha Bischoff <Sascha.Bischoff at arm.com> wrote:
> > 
> > This change introduces GICv5 load/put. Additionally, it plumbs in
> > save/restore for:
> > 
> > * PPIs (ICH_PPI_x_EL2 regs)
> > * ICH_VMCR_EL2
> > * ICH_APR_EL2
> > * ICC_ICSR_EL1
> > 
> > A GICv5-specific enable bit is added to struct vgic_vmcr as this
> > differs from previous GICs. On GICv5-native systems, the VMCR only
> > contains the enable bit (driven by the guest via ICC_CR0_EL1.EN)
> > and
> > the priority mask (PCR).
> > 
> > A struct gicv5_vpe is also introduced. This currently only contains
> > a
> > single field - bool resident - which is used to track if a VPE is
> > currently running or not, and is used to avoid a case of double
> > load
> > or double put on the WFI path for a vCPU. This struct will be
> > extended
> > as additional GICv5 support is merged, specifically for VPE
> > doorbells.
> > 
> > Co-authored-by: Timothy Hayes <timothy.hayes at arm.com>
> > Signed-off-by: Timothy Hayes <timothy.hayes at arm.com>
> > Signed-off-by: Sascha Bischoff <sascha.bischoff at arm.com>
> > Reviewed-by: Jonathan Cameron <jonathan.cameron at huawei.com>
> > ---
> >  arch/arm64/kvm/hyp/nvhe/switch.c   | 12 +++++
> >  arch/arm64/kvm/vgic/vgic-mmio.c    | 28 +++++++----
> >  arch/arm64/kvm/vgic/vgic-v5.c      | 74
> > ++++++++++++++++++++++++++++++
> >  arch/arm64/kvm/vgic/vgic.c         | 32 ++++++++-----
> >  arch/arm64/kvm/vgic/vgic.h         |  7 +++
> >  include/kvm/arm_vgic.h             |  2 +
> >  include/linux/irqchip/arm-gic-v5.h |  5 ++
> >  7 files changed, 141 insertions(+), 19 deletions(-)
> > 
> > diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c
> > b/arch/arm64/kvm/hyp/nvhe/switch.c
> > index b41485ce295ab..a88da302b6d08 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> > @@ -113,6 +113,12 @@ static void __deactivate_traps(struct kvm_vcpu
> > *vcpu)
> >  /* Save VGICv3 state on non-VHE systems */
> >  static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
> >  {
> > +	if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) {
> > +		__vgic_v5_save_state(&vcpu-
> > >arch.vgic_cpu.vgic_v5);
> > +		__vgic_v5_save_ppi_state(&vcpu-
> > >arch.vgic_cpu.vgic_v5);
> > +		return;
> > +	}
> > +
> >  	if
> > (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
> >  		__vgic_v3_save_state(&vcpu-
> > >arch.vgic_cpu.vgic_v3);
> >  		__vgic_v3_deactivate_traps(&vcpu-
> > >arch.vgic_cpu.vgic_v3);
> > @@ -122,6 +128,12 @@ static void __hyp_vgic_save_state(struct
> > kvm_vcpu *vcpu)
> >  /* Restore VGICv3 state on non-VHE systems */
> >  static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
> >  {
> > +	if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) {
> > +		__vgic_v5_restore_state(&vcpu-
> > >arch.vgic_cpu.vgic_v5);
> > +		__vgic_v5_restore_ppi_state(&vcpu-
> > >arch.vgic_cpu.vgic_v5);
> > +		return;
> > +	}
> > +
> >  	if
> > (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
> >  		__vgic_v3_activate_traps(&vcpu-
> > >arch.vgic_cpu.vgic_v3);
> >  		__vgic_v3_restore_state(&vcpu-
> > >arch.vgic_cpu.vgic_v3);
> > diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c
> > b/arch/arm64/kvm/vgic/vgic-mmio.c
> > index a573b1f0c6cbe..675c2844f5e5c 100644
> > --- a/arch/arm64/kvm/vgic/vgic-mmio.c
> > +++ b/arch/arm64/kvm/vgic/vgic-mmio.c
> > @@ -842,18 +842,30 @@ vgic_find_mmio_region(const struct
> > vgic_register_region *regions,
> >  
> >  void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
> >  {
> > -	if (kvm_vgic_global_state.type == VGIC_V2)
> > -		vgic_v2_set_vmcr(vcpu, vmcr);
> > -	else
> > -		vgic_v3_set_vmcr(vcpu, vmcr);
> > +	const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> > +
> > +	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V5) {
> > +		vgic_v5_set_vmcr(vcpu, vmcr);
> > +	} else {
> > +		if (kvm_vgic_global_state.type == VGIC_V2)
> > +			vgic_v2_set_vmcr(vcpu, vmcr);
> > +		else
> > +			vgic_v3_set_vmcr(vcpu, vmcr);
> > +	}
> 
> This looks rather ugly, and doesn't make use of the helpers you
> introduced in patch #1. How about:
> 
> 	switch (dist->vgic_model) {
> 	case KVM_DEV_TYPE_ARM_VGIC_V5:
> 		vgic_v5_set_vmcr(vcpu, vmcr);
> 		break;
> 	case KVM_DEV_TYPE_ARM_VGIC_V3:
> 		vgic_v3_set_vmcr(vcpu, vmcr);
> 		break;
> 	case KVM_DEV_TYPE_ARM_VGIC_V2:
> 		if
> (static_branch_likely(&kvm_vgic_global_state.gicv3_cpuif))
> 			vgic_v3_set_vmcr(vcpu, vmcr);
> 		else
> 			vgic_v2_set_vmcr(vcpu, vmcr);
> 		break;
> 	default:
> 		BUG();
> 	}
> 
> Yes, the handling of v3 is a bit redundant, but I find it overall
> more readable.

I've gone and made this change wherever applicable. The exception is
for vgic_save_state() and vgic_restore_state(). These are still called
in the case where we don't have an in-kernel irqchip (so the BUG()
would be a bad idea there anyhow), which complicates the logic quite a
bit.

The __vgic_v3_restore_state() code (and the save counterpart) is doing
quite a lot of heavy lifting. It applies in the GICv3-on-GICv3, GICv2-
on-GICv3, GICv3-on-GICv5, and no-in-kernel-irqchip cases, and is
responsible for configuring the appropriate trap handling in all of
these cases. This quickly makes the logic rather complex, as we need to
call it in the default case too, assuming we're on GICv3-based
hardware.

I've added in a comment explaining the logic and why we don't have a
switch for these two.

> 
> >  }
> >  
> >  void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
> >  {
> > -	if (kvm_vgic_global_state.type == VGIC_V2)
> > -		vgic_v2_get_vmcr(vcpu, vmcr);
> > -	else
> > -		vgic_v3_get_vmcr(vcpu, vmcr);
> > +	const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> > +
> > +	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V5) {
> > +		vgic_v5_get_vmcr(vcpu, vmcr);
> > +	} else {
> > +		if (kvm_vgic_global_state.type == VGIC_V2)
> > +			vgic_v2_get_vmcr(vcpu, vmcr);
> > +		else
> > +			vgic_v3_get_vmcr(vcpu, vmcr);
> > +	}
> >  }
> >  
> >  /*
> > diff --git a/arch/arm64/kvm/vgic/vgic-v5.c
> > b/arch/arm64/kvm/vgic/vgic-v5.c
> > index 2c51b9ba4f118..5b35c756887a9 100644
> > --- a/arch/arm64/kvm/vgic/vgic-v5.c
> > +++ b/arch/arm64/kvm/vgic/vgic-v5.c
> > @@ -85,3 +85,77 @@ int vgic_v5_probe(const struct gic_kvm_info
> > *info)
> >  
> >  	return 0;
> >  }
> > +
> > +void vgic_v5_load(struct kvm_vcpu *vcpu)
> > +{
> > +	struct vgic_v5_cpu_if *cpu_if = &vcpu-
> > >arch.vgic_cpu.vgic_v5;
> > +
> > +	/*
> > +	 * On the WFI path, vgic_load is called a second time. The
> > first is when
> > +	 * scheduling in the vcpu thread again, and the second is
> > when leaving
> > +	 * WFI. Skip the second instance as it serves no purpose
> > and just
> > +	 * restores the same state again.
> > +	 */
> > +	if (READ_ONCE(cpu_if->gicv5_vpe.resident))
> > +		return;
> 
> I'm perplex. What is READ_ONCE()/WRITE_ONCE() guaranteeing?

Nothing anymore. In the original code (before it got cleaned up, and
before the PPI support got split out for review) we had a race with a
VPE going non-resident and doorbells arriving which resulted in us
incorrectly not making the VPE resident again. This was addressed using
READ_ONCE()/WRITE_ONCE().

For the PPI code under review, it certainly isn't necessary as we don't
have any of the code in place to make things resident/non-resident, so
I've removed it. I also don't think it is required anymore in the
original case, but will go and check that one before dropping it there
too.

> 
> > +
> > +	kvm_call_hyp(__vgic_v5_restore_vmcr_apr, cpu_if);
> > +
> > +	WRITE_ONCE(cpu_if->gicv5_vpe.resident, true);
> > +}
> > +
> > +void vgic_v5_put(struct kvm_vcpu *vcpu)
> > +{
> > +	struct vgic_v5_cpu_if *cpu_if = &vcpu-
> > >arch.vgic_cpu.vgic_v5;
> > +
> > +	/*
> > +	 * Do nothing if we're not resident. This can happen in
> > the WFI path
> > +	 * where we do a vgic_put in the WFI path and again later
> > when
> > +	 * descheduling the thread. We risk losing VMCR state if
> > we sync it
> > +	 * twice, so instead return early in this case.
> > +	 */
> > +	if (!READ_ONCE(cpu_if->gicv5_vpe.resident))
> > +		return;
> > +
> > +	kvm_call_hyp(__vgic_v5_save_apr, cpu_if);
> > +
> > +	WRITE_ONCE(cpu_if->gicv5_vpe.resident, false);
> > +}
> > +
> > +void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr
> > *vmcrp)
> > +{
> > +	struct vgic_v5_cpu_if *cpu_if = &vcpu-
> > >arch.vgic_cpu.vgic_v5;
> > +	u64 vmcr = cpu_if->vgic_vmcr;
> > +
> > +	vmcrp->en = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcr);
> > +	vmcrp->pmr = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcr);
> > +}
> > +
> > +void vgic_v5_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr
> > *vmcrp)
> > +{
> > +	struct vgic_v5_cpu_if *cpu_if = &vcpu-
> > >arch.vgic_cpu.vgic_v5;
> > +	u64 vmcr;
> > +
> > +	vmcr = FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcrp->pmr)
> > |
> > +	       FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcrp->en);
> > +
> > +	cpu_if->vgic_vmcr = vmcr;
> > +}
> > +
> > +void vgic_v5_restore_state(struct kvm_vcpu *vcpu)
> > +{
> > +	struct vgic_v5_cpu_if *cpu_if = &vcpu-
> > >arch.vgic_cpu.vgic_v5;
> > +
> > +	__vgic_v5_restore_state(cpu_if);
> > +	kvm_call_hyp(__vgic_v5_restore_ppi_state, cpu_if);
> > +	dsb(sy);
> > +}
> > +
> > +void vgic_v5_save_state(struct kvm_vcpu *vcpu)
> > +{
> > +	struct vgic_v5_cpu_if *cpu_if = &vcpu-
> > >arch.vgic_cpu.vgic_v5;
> > +
> > +	__vgic_v5_save_state(cpu_if);
> > +	kvm_call_hyp(__vgic_v5_save_ppi_state, cpu_if);
> > +	dsb(sy);
> > +}
> > diff --git a/arch/arm64/kvm/vgic/vgic.c
> > b/arch/arm64/kvm/vgic/vgic.c
> > index 2c0e8803342e2..1005ff5f36235 100644
> > --- a/arch/arm64/kvm/vgic/vgic.c
> > +++ b/arch/arm64/kvm/vgic/vgic.c
> > @@ -996,7 +996,9 @@ static inline bool
> > can_access_vgic_from_kernel(void)
> >  
> >  static inline void vgic_save_state(struct kvm_vcpu *vcpu)
> >  {
> > -	if
> > (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
> > +	if (vgic_is_v5(vcpu->kvm))
> > +		vgic_v5_save_state(vcpu);
> > +	else if
> > (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
> >  		vgic_v2_save_state(vcpu);
> >  	else
> >  		__vgic_v3_save_state(&vcpu-
> > >arch.vgic_cpu.vgic_v3);
> > @@ -1005,14 +1007,16 @@ static inline void vgic_save_state(struct
> > kvm_vcpu *vcpu)
> >  /* Sync back the hardware VGIC state into our emulation after a
> > guest's run. */
> >  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
> >  {
> > -	/* If nesting, emulate the HW effect from L0 to L1 */
> > -	if (vgic_state_is_nested(vcpu)) {
> > -		vgic_v3_sync_nested(vcpu);
> > -		return;
> > -	}
> > +	if (!vgic_is_v5(vcpu->kvm)) {
> 
> This should directly check for v3. Even once we add v5 support to NV,
> I don't expect the code to be common at all.

Done. Will do the same for other case I come across.

> 
> > +		/* If nesting, emulate the HW effect from L0 to L1
> > */
> > +		if (vgic_state_is_nested(vcpu)) {
> > +			vgic_v3_sync_nested(vcpu);
> > +			return;
> > +		}
> >  
> > -	if (vcpu_has_nv(vcpu))
> > -		vgic_v3_nested_update_mi(vcpu);
> > +		if (vcpu_has_nv(vcpu))
> > +			vgic_v3_nested_update_mi(vcpu);
> > +	}
> >  
> >  	if (can_access_vgic_from_kernel())
> >  		vgic_save_state(vcpu);
> > @@ -1034,7 +1038,9 @@ void kvm_vgic_process_async_update(struct
> > kvm_vcpu *vcpu)
> >  
> >  static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
> >  {
> > -	if
> > (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
> > +	if (vgic_is_v5(vcpu->kvm))
> > +		vgic_v5_restore_state(vcpu);
> > +	else if
> > (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
> >  		vgic_v2_restore_state(vcpu);
> >  	else
> >  		__vgic_v3_restore_state(&vcpu-
> > >arch.vgic_cpu.vgic_v3);
> 
> I have similar comments as some the previous hunks. Using switch/case
> statements would be more readable IMO.

Done where appropriate (see comment above).

Thanks,
Sascha

> 
> Thanks,
> 
> 	M.
> 



More information about the linux-arm-kernel mailing list