[PATCH 18/32] KVM: arm64: gic-v5: Check for pending PPIs

Wed Dec 17 06:29:18 PST 2025

On Fri, 12 Dec 2025 15:22:41 +0000,
Sascha Bischoff <Sascha.Bischoff at arm.com> wrote:
> 
> This change allows KVM to check for pending PPI interrupts. This has
> two main components:
> 
> First of all, the effective priority mask is calculated.  This is a
> combination of the priority mask in the VPEs ICC_PCR_EL1.PRIORITY and
> the currently running priority as determined from the VPE's
> ICH_APR_EL1. If an interrupt's prioirity is greater than or equal to
> the effective priority mask, it can be signalled. Otherwise, it
> cannot.
> 
> Secondly, any Enabled and Pending PPIs must be checked against this
> compound priority mask. The reqires the PPI priorities to by synced
> back to the KVM shadow state - this is skipped in general operation as
> it isn't required and is rather expensive. If any Enabled and Pending
> PPIs are of sufficient priority to be signalled, then there are
> pending PPIs. Else, there are not.  This ensures that a VPE is not
> woken when it cannot actually process the pending interrupts.
> 
> Signed-off-by: Sascha Bischoff <sascha.bischoff at arm.com>
> ---
>  arch/arm64/kvm/vgic/vgic-v5.c | 123 ++++++++++++++++++++++++++++++++++
>  arch/arm64/kvm/vgic/vgic.c    |  10 ++-
>  arch/arm64/kvm/vgic/vgic.h    |   1 +
>  3 files changed, 131 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
> index d54595fbf4586..35740e88b3591 100644
> --- a/arch/arm64/kvm/vgic/vgic-v5.c
> +++ b/arch/arm64/kvm/vgic/vgic-v5.c
> @@ -54,6 +54,31 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
>  	return 0;
>  }
>  
> +static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu *vcpu)
> +{
> +	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
> +	unsigned highest_ap, priority_mask;

Please use explicit types that match their assignment.

> +
> +	/*
> +	 * Counting the number of trailing zeros gives the current
> +	 * active priority. Explicitly use the 32-bit version here as
> +	 * we have 32 priorities. 0x20 then means that there are no
> +	 * active priorities.
> +	 */
> +	highest_ap = __builtin_ctz(cpu_if->vgic_apr);

From https://gcc.gnu.org/onlinedocs/gcc/Bit-Operation-Builtins.html

<quote>
Built-in Function: int __builtin_ctz (unsigned int x)

    Returns the number of trailing 0-bits in x, starting at the least
significant bit position. If x is 0, the result is undefined.
</quote>

We really don't like undefined results.

> +
> +	/*
> +	 * An interrupt is of sufficient priority if it is equal to or
> +	 * greater than the priority mask. Add 1 to the priority mask
> +	 * (i.e., lower priority) to match the APR logic before taking
> +	 * the min. This gives us the lowest priority that is masked.
> +	 */
> +	priority_mask = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR, cpu_if->vgic_vmcr);
> +	priority_mask = min(highest_ap, priority_mask + 1);
> +
> +	return priority_mask;
> +}
> +
>  static bool vgic_v5_ppi_set_pending_state(struct kvm_vcpu *vcpu,
>  					  struct vgic_irq *irq)
>  {
> @@ -121,6 +146,104 @@ void vgic_v5_set_ppi_ops(struct vgic_irq *irq)
>  	irq->ops = &vgic_v5_ppi_irq_ops;
>  }
>  
> +
> +/*
> + * Sync back the PPI priorities to the vgic_irq shadow state
> + */
> +static void vgic_v5_sync_ppi_priorities(struct kvm_vcpu *vcpu)
> +{
> +	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
> +	unsigned long flags;
> +	int i, reg;
> +
> +	/* We have 16 PPI Priority regs */
> +	for (reg = 0; reg < 16; reg++) {
> +		const unsigned long priorityr = cpu_if->vgic_ppi_priorityr[reg];
> +
> +		for (i = 0; i < 8; ++i) {

Urgh... 128 locks being taken is no good. We need something better.

> +			struct vgic_irq *irq;
> +			u32 intid;
> +			u8 priority;
> +
> +			priority = (priorityr >> (i * 8)) & 0x1f;
> +
> +			intid = FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_PPI);
> +			intid |= FIELD_PREP(GICV5_HWIRQ_ID, reg * 8 + i);
> +
> +			irq = vgic_get_vcpu_irq(vcpu, intid);
> +			raw_spin_lock_irqsave(&irq->irq_lock, flags);
> +
> +			irq->priority = priority;
> +
> +			raw_spin_unlock_irqrestore(&irq->irq_lock, flags);

scoped_guard()

> +			vgic_put_irq(vcpu->kvm, irq);
> +		}
> +	}
> +}
> +
> +bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu)
> +{
> +	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
> +	unsigned long flags;
> +	int i, reg;
> +	unsigned int priority_mask;
> +
> +	/* If no pending bits are set, exit early */
> +	if (likely(!cpu_if->vgic_ppi_pendr[0] && !cpu_if->vgic_ppi_pendr[1]))
> +		return false;
> +
> +	priority_mask = vgic_v5_get_effective_priority_mask(vcpu);
> +
> +	/* If the combined priority mask is 0, nothing can be signalled! */
> +	if (!priority_mask)
> +		return false;
> +
> +	/* The shadow priority is only updated on demand, sync it across first */
> +	vgic_v5_sync_ppi_priorities(vcpu);
> +
> +	for (reg = 0; reg < 2; reg++) {
> +		unsigned long possible_bits;
> +		const unsigned long enabler = cpu_if->vgic_ich_ppi_enabler_exit[reg];
> +		const unsigned long pendr = cpu_if->vgic_ppi_pendr_exit[reg];
> +		bool has_pending = false;
> +
> +		/* Check all interrupts that are enabled and pending */
> +		possible_bits = enabler & pendr;
> +
> +		/*
> +		 * Optimisation: pending and enabled with no active priorities
> +		 */
> +		if (possible_bits && priority_mask > 0x1f)
> +			return true;
> +
> +		for_each_set_bit(i, &possible_bits, 64) {
> +			struct vgic_irq *irq;
> +			u32 intid;
> +
> +			intid = FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_PPI);
> +			intid |= FIELD_PREP(GICV5_HWIRQ_ID, reg * 64 + i);
> +
> +			irq = vgic_get_vcpu_irq(vcpu, intid);
> +			raw_spin_lock_irqsave(&irq->irq_lock, flags);
> +
> +			/*
> +			 * We know that the interrupt is enabled and pending, so
> +			 * only check the priority.
> +			 */
> +			if (irq->priority <= priority_mask)
> +				has_pending = true;
> +
> +			raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
> +			vgic_put_irq(vcpu->kvm, irq);
> +
> +			if (has_pending)
> +				return true;
> +		}
> +	}

So we do this stuff *twice*. Doesn't strike me as being optimal. It is
also not clear that we need to resync it all when calling
kvm_vgic_vcpu_pending_irq(), which can happen for any odd reason
(spurious wake-up from kvm_vcpu_check_block()).

> +
> +	return false;
> +}
> +
>  /*
>   * Detect any PPIs state changes, and propagate the state with KVM's
>   * shadow structures.
> diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
> index e534876656ca7..5d18a03cc11d5 100644
> --- a/arch/arm64/kvm/vgic/vgic.c
> +++ b/arch/arm64/kvm/vgic/vgic.c
> @@ -1174,11 +1174,15 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
>  	unsigned long flags;
>  	struct vgic_vmcr vmcr;
>  
> -	if (!vcpu->kvm->arch.vgic.enabled)
> +	if (!vcpu->kvm->arch.vgic.enabled && !vgic_is_v5(vcpu->kvm))
>  		return false;
>  
> -	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
> -		return true;
> +	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V5) {
> +		return vgic_v5_has_pending_ppi(vcpu);
> +	} else {

Drop the 'else'.

> +		if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
> +			return true;
> +	}
>  
>  	vgic_get_vmcr(vcpu, &vmcr);
>  
> diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
> index 5a77318ddb87a..4b3a1e7ca3fb4 100644
> --- a/arch/arm64/kvm/vgic/vgic.h
> +++ b/arch/arm64/kvm/vgic/vgic.h
> @@ -387,6 +387,7 @@ void vgic_debug_destroy(struct kvm *kvm);
>  int vgic_v5_probe(const struct gic_kvm_info *info);
>  void vgic_v5_set_ppi_ops(struct vgic_irq *irq);
>  int vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, u32 irq, bool dvi);
> +bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu);
>  void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu);
>  void vgic_v5_fold_irq_state(struct kvm_vcpu *vcpu);
>  void vgic_v5_load(struct kvm_vcpu *vcpu);

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.