[PATCH v6 22/39] KVM: arm64: gic-v5: Check for pending PPIs

Thu Mar 19 01:27:03 PDT 2026

On Tue, 2026-03-17 at 17:08 +0000, Marc Zyngier wrote:
> On Tue, 17 Mar 2026 11:45:41 +0000,
> Sascha Bischoff <Sascha.Bischoff at arm.com> wrote:
> > 
> > This change allows KVM to check for pending PPI interrupts. This
> > has
> > two main components:
> > 
> > First of all, the effective priority mask is calculated.  This is a
> > combination of the priority mask in the VPEs ICC_PCR_EL1.PRIORITY
> > and
> > the currently running priority as determined from the VPE's
> > ICH_APR_EL1. If an interrupt's priority is greater than or equal to
> > the effective priority mask, it can be signalled. Otherwise, it
> > cannot.
> > 
> > Secondly, any Enabled and Pending PPIs must be checked against this
> > compound priority mask. The reqires the PPI priorities to by synced
> > back to the KVM shadow state on WFI entry - this is skipped in
> > general
> > operation as it isn't required and is rather expensive. If any
> > Enabled
> > and Pending PPIs are of sufficient priority to be signalled, then
> > there are pending PPIs. Else, there are not. This ensures that a
> > VPE
> > is not woken when it cannot actually process the pending
> > interrupts.
> > 
> > As the PPI priorities are not synced back to the KVM shadow state
> > on
> > every guest exit, they must by synced prior to checking if there
> > are
> > pending interrupts for the guest. The sync itself happens in
> > vgic_v5_put() if, and only if, the vcpu is entering WFI as this is
> > the
> > only case where it is not planned to run the vcpu thread again. If
> > the
> > vcpu enters WFI, the vcpu thread will be descheduled and won't be
> > rescheduled again until it has a pending interrupt, which is
> > checked
> > from kvm_arch_vcpu_runnable().
> > 
> > Signed-off-by: Sascha Bischoff <sascha.bischoff at arm.com>
> > Reviewed-by: Joey Gouly <joey.gouly at arm.com>
> > Reviewed-by: Jonathan Cameron <jonathan.cameron at huawei.com>
> > ---
> >  arch/arm64/kvm/vgic/vgic-v5.c | 101
> > ++++++++++++++++++++++++++++++++++
> >  arch/arm64/kvm/vgic/vgic.c    |   3 +
> >  arch/arm64/kvm/vgic/vgic.h    |   1 +
> >  3 files changed, 105 insertions(+)
> > 
> > diff --git a/arch/arm64/kvm/vgic/vgic-v5.c
> > b/arch/arm64/kvm/vgic/vgic-v5.c
> > index e080fce61dc35..14dba634f79b4 100644
> > --- a/arch/arm64/kvm/vgic/vgic-v5.c
> > +++ b/arch/arm64/kvm/vgic/vgic-v5.c
> > @@ -122,6 +122,29 @@ int vgic_v5_finalize_ppi_state(struct kvm
> > *kvm)
> >  	return 0;
> >  }
> >  
> > +static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu
> > *vcpu)
> > +{
> > +	struct vgic_v5_cpu_if *cpu_if = &vcpu-
> > >arch.vgic_cpu.vgic_v5;
> > +	u32 highest_ap, priority_mask;
> > +
> > +	/*
> > +	 * Counting the number of trailing zeros gives the current
> > active
> > +	 * priority. Explicitly use the 32-bit version here as we
> > have 32
> > +	 * priorities. 32 then means that there are no active
> > priorities.
> > +	 */
> > +	highest_ap = cpu_if->vgic_apr ? __builtin_ctz(cpu_if-
> > >vgic_apr) : 32;
> > +
> > +	/*
> > +	 * An interrupt is of sufficient priority if it is equal
> > to or
> > +	 * greater than the priority mask. Add 1 to the priority
> > mask
> > +	 * (i.e., lower priority) to match the APR logic before
> > taking
> > +	 * the min. This gives us the lowest priority that is
> > masked.
> > +	 */
> > +	priority_mask = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR,
> > cpu_if->vgic_vmcr);
> > +
> > +	return min(highest_ap, priority_mask + 1);
> > +}
> > +
> >  /*
> >   * For GICv5, the PPIs are mostly directly managed by the
> > hardware. We (the
> >   * hypervisor) handle the pending, active, enable state
> > save/restore, but don't
> > @@ -172,6 +195,80 @@ void vgic_v5_set_ppi_ops(struct vgic_irq *irq)
> >  		irq->ops = &vgic_v5_ppi_irq_ops;
> >  }
> >  
> > +/*
> > + * Sync back the PPI priorities to the vgic_irq shadow state for
> > any interrupts
> > + * exposed to the guest (skipping all others).
> > + */
> > +static void vgic_v5_sync_ppi_priorities(struct kvm_vcpu *vcpu)
> > +{
> > +	struct vgic_v5_cpu_if *cpu_if = &vcpu-
> > >arch.vgic_cpu.vgic_v5;
> > +	u64 priorityr;
> > +	int i;
> > +
> > +	/*
> > +	 * We have up to 16 PPI Priority regs, but only have a few
> > interrupts
> > +	 * that the guest is allowed to use. Limit our sync of PPI
> > priorities to
> > +	 * those actually exposed to the guest by first iterating
> > over the mask
> > +	 * of exposed PPIs.
> > +	 */
> > +	for_each_set_bit(i, vcpu->kvm-
> > >arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
> > +		u32 intid = vgic_v5_make_ppi(i);
> > +		struct vgic_irq *irq;
> > +		int pri_idx, pri_reg;
> > +		u8 priority;
> > +
> > +		/*
> > +		 * Determine which priority register and the field
> > within it to
> > +		 * extract.
> > +		 */
> > +		pri_reg = i / 8;
> > +		pri_idx = i % 8;
> > +
> > +		priorityr = cpu_if->vgic_ppi_priorityr[pri_reg];
> > +		priority = (priorityr >> (pri_idx * 8)) &
> > GENMASK(4, 0);
> 
> It should be able to write this as:
> 
> 		pri_bit = pri_idx * 8;
> 		priority = field_get(GENMASK(pri_bit + 4, pri_bit),
> priorityr);
> 
> which while more verbose, clearly shows that you are extracting a
> field from the register.

Yeah, that's definitely better. Thanks.

> 
> > +
> > +		irq = vgic_get_vcpu_irq(vcpu, intid);
> > +
> > +		scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
> > +			irq->priority = priority;
> > +
> > +		vgic_put_irq(vcpu->kvm, irq);
> > +	}
> > +}
> > +
> > +bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu)
> > +{
> > +	unsigned int priority_mask;
> > +	int i;
> > +
> > +	priority_mask = vgic_v5_get_effective_priority_mask(vcpu);
> > +
> > +	/* If the combined priority mask is 0, nothing can be
> > signalled! */
> > +	if (!priority_mask)
> > +		return false;
> 
> The other case when nothing can be signalled is when ICH_VMCR_EL2.En
> == 0, meaning that the guest hasn't enabled interrupts at all.
> 
> This should be taken into account, or a trapping WFI is going to turn
> into a nice CPU hog.

Very valid point.

There are two options for this. The ICH_VMCR_EL2 contains the En bit
(which is an alias of ICH_CR0_EL1.EN, i.e., is set/cleared when the
guest enables/disables interrupt delivery for a vcpu).

The first would be to explicitly check this bit when determining if
there are pending PPIs for a vcpu. However, this would need to be
checked in multiple places as the code evolves. One of these cases
would be when requesting a VPE Doorbell.

For both PPIs and VPE Doorbells, one needs to figure out the threshold
for an interrupt signalling. Therefore, I think it makes more sense to
roll this into the calculation of the priority mask. Effectively, if a
vcpu has not opted into interrupt delivery, the effective running
priority is the highest priority and nothing can signal. This is the
second option.

I am proposing this change:

diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index 22230e6eaa8bb..450960b792331 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -127,6 +127,14 @@ static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu *vcpu)
        struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
        u32 highest_ap, priority_mask;
 
+       /*
+        * If the guest's CPU has not opted to receive interrupts, then the
+        * effective running priority is the highest priority. Just return 0
+        * (the highest priority).
+        */
+       if (!FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_EN, cpu_if->vgic_vmcr))
+               return 0;
+
        /*
         * Counting the number of trailing zeros gives the current active
         * priority. Explicitly use the 32-bit version here as we have 32
@@ -237,7 +245,12 @@ bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu)
 
        priority_mask = vgic_v5_get_effective_priority_mask(vcpu);
 
-       /* If the combined priority mask is 0, nothing can be signalled! */
+       /*
+        * If the combined priority mask is 0, nothing can be signalled! In the
+        * case where the guest has disabled interrupt delivery for the vcpu
+        * (via ICV_CR0_EL1.EN->ICH_VMCR_EL2.EN), we calculate the priority mask
+        * as 0 too (the highest possible priority).
+        */
        if (!priority_mask)
                return false;


> 
> > +
> > +	for_each_set_bit(i, vcpu->kvm-
> > >arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
> > +		u32 intid = vgic_v5_make_ppi(i);
> > +		bool has_pending = false;
> > +		struct vgic_irq *irq;
> > +
> > +		irq = vgic_get_vcpu_irq(vcpu, intid);
> > +
> > +		scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
> > {
> > +			if (irq->enabled && irq_is_pending(irq) &&
> > +			    irq->priority <= priority_mask)
> > +				has_pending = true;
> > +		}
> 
> nit:
> 		scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
> 			has_pending = (irq->enabled &&
> irq_is_pending(irq) &&
> 				       irq->priority <=
> priority_mask);

Done

Thanks,
Sascha

> 
> > +
> > +		vgic_put_irq(vcpu->kvm, irq);
> > +
> > +		if (has_pending)
> > +			return true;
> > +	}
> > +
> > +	return false;
> > +}
> > +
> >  /*
> >   * Detect any PPIs state changes, and propagate the state with
> > KVM's
> >   * shadow structures.
> > @@ -299,6 +396,10 @@ void vgic_v5_put(struct kvm_vcpu *vcpu)
> >  	kvm_call_hyp(__vgic_v5_save_apr, cpu_if);
> >  
> >  	cpu_if->gicv5_vpe.resident = false;
> > +
> > +	/* The shadow priority is only updated on entering WFI */
> > +	if (vcpu_get_flag(vcpu, IN_WFI))
> > +		vgic_v5_sync_ppi_priorities(vcpu);
> >  }
> >  
> >  void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr
> > *vmcrp)
> > diff --git a/arch/arm64/kvm/vgic/vgic.c
> > b/arch/arm64/kvm/vgic/vgic.c
> > index 3b148d3d4875e..d448205d80617 100644
> > --- a/arch/arm64/kvm/vgic/vgic.c
> > +++ b/arch/arm64/kvm/vgic/vgic.c
> > @@ -1230,6 +1230,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu
> > *vcpu)
> >  	unsigned long flags;
> >  	struct vgic_vmcr vmcr;
> >  
> > +	if (vgic_is_v5(vcpu->kvm))
> > +		return vgic_v5_has_pending_ppi(vcpu);
> > +
> >  	if (!vcpu->kvm->arch.vgic.enabled)
> >  		return false;
> >  
> > diff --git a/arch/arm64/kvm/vgic/vgic.h
> > b/arch/arm64/kvm/vgic/vgic.h
> > index ef4e3fb7159dd..3a9e610eefb00 100644
> > --- a/arch/arm64/kvm/vgic/vgic.h
> > +++ b/arch/arm64/kvm/vgic/vgic.h
> > @@ -365,6 +365,7 @@ void vgic_debug_destroy(struct kvm *kvm);
> >  
> >  int vgic_v5_probe(const struct gic_kvm_info *info);
> >  void vgic_v5_set_ppi_ops(struct vgic_irq *irq);
> > +bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu);
> >  void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu);
> >  void vgic_v5_fold_ppi_state(struct kvm_vcpu *vcpu);
> >  void vgic_v5_load(struct kvm_vcpu *vcpu);
> 
> Thanks,
> 
> 	M.
>