[PATCH 2/4] KVM: arm64: vgic-v3: Implement MMIO-based LPI invalidation

Tue Mar 15 22:26:06 PDT 2022

Hi Marc,

On Mon, Mar 14, 2022 at 04:40:42PM +0000, Marc Zyngier wrote:
> Since GICv4.1, it has become legal for an implementation to advertise
> GICR_{INVLPIR,INVALLR,SYNCR} while having an ITS, allowing for a more
> efficient invalidation scheme (no guest command queue contention when
> multiple CPUs are generating invalidations).
> 
> Provide the invalidation registers as a primitive to their ITS
> counterpart. Note that we don't advertise them to the guest yet
> (the architecture allows an implementation to do this).
> 
> Signed-off-by: Marc Zyngier <maz at kernel.org>
> ---
>  arch/arm64/kvm/vgic/vgic-its.c     | 62 ++++++++++++++++++++----------
>  arch/arm64/kvm/vgic/vgic-mmio-v3.c | 62 ++++++++++++++++++++++++++++++
>  arch/arm64/kvm/vgic/vgic.h         |  4 ++
>  include/kvm/arm_vgic.h             |  1 +
>  4 files changed, 108 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
> index 089fc2ffcb43..cc62d8a8180f 100644
> --- a/arch/arm64/kvm/vgic/vgic-its.c
> +++ b/arch/arm64/kvm/vgic/vgic-its.c
> @@ -1272,6 +1272,11 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
>  	return 0;
>  }
>  
> +int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq)
> +{
> +	return update_lpi_config(kvm, irq, NULL, true);
> +}
> +
>  /*
>   * The INV command syncs the configuration bits from the memory table.
>   * Must be called with the its_lock mutex held.
> @@ -1288,7 +1293,41 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
>  	if (!ite)
>  		return E_ITS_INV_UNMAPPED_INTERRUPT;
>  
> -	return update_lpi_config(kvm, ite->irq, NULL, true);
> +	return vgic_its_inv_lpi(kvm, ite->irq);
> +}
> +
> +/**
> + * vgic_its_invall - invalidate all LPIs targetting a given vcpu
> + * @vcpu: the vcpu for which the RD is targetted by an invalidation
> + *
> + * Contrary to the INVALL command, this targets a RD instead of a
> + * collection, and we don't need to hold the its_lock, since no ITS is
> + * involved here.
> + */
> +int vgic_its_invall(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	int irq_count, i = 0;
> +	u32 *intids;
> +
> +	irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
> +	if (irq_count < 0)
> +		return irq_count;
> +
> +	for (i = 0; i < irq_count; i++) {
> +		struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]);
> +		if (!irq)
> +			continue;
> +		update_lpi_config(kvm, irq, vcpu, false);
> +		vgic_put_irq(kvm, irq);
> +	}
> +
> +	kfree(intids);
> +
> +	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
> +		its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
> +
> +	return 0;
>  }

nit: the refactoring happening at the same time as the functional change
is a bit distracting. Looks fine though.

>  /*
> @@ -1305,32 +1344,13 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
>  	u32 coll_id = its_cmd_get_collection(its_cmd);
>  	struct its_collection *collection;
>  	struct kvm_vcpu *vcpu;
> -	struct vgic_irq *irq;
> -	u32 *intids;
> -	int irq_count, i;
>  
>  	collection = find_collection(its, coll_id);
>  	if (!its_is_collection_mapped(collection))
>  		return E_ITS_INVALL_UNMAPPED_COLLECTION;
>  
>  	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
> -
> -	irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
> -	if (irq_count < 0)
> -		return irq_count;
> -
> -	for (i = 0; i < irq_count; i++) {
> -		irq = vgic_get_irq(kvm, NULL, intids[i]);
> -		if (!irq)
> -			continue;
> -		update_lpi_config(kvm, irq, vcpu, false);
> -		vgic_put_irq(kvm, irq);
> -	}
> -
> -	kfree(intids);
> -
> -	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
> -		its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
> +	vgic_its_invall(vcpu);
>  
>  	return 0;
>  }
> diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> index 58e40b4874f8..186bf35078bf 100644
> --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> @@ -525,6 +525,59 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
>  			   pendbaser) != old_pendbaser);
>  }
>  
> +static unsigned long vgic_mmio_read_sync(struct kvm_vcpu *vcpu,
> +					 gpa_t addr, unsigned int len)
> +{
> +	return !!atomic_read(&vcpu->arch.vgic_cpu.syncr_busy);
> +}
> +
> +static void vgic_make_rdist_busy(struct kvm_vcpu *vcpu, bool busy)

nit: s/make/set, since you use this helper to decrement the counter too.

> +{
> +	if (busy) {
> +		atomic_inc(&vcpu->arch.vgic_cpu.syncr_busy);
> +		smp_mb__after_atomic();
> +	} else {
> +		smp_mb__before_atomic();
> +		atomic_dec(&vcpu->arch.vgic_cpu.syncr_busy);
> +	}
> +}
> +
> +static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu,
> +				   gpa_t addr, unsigned int len,
> +				   unsigned long val)
> +{
> +	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> +	struct vgic_irq *irq;
> +
> +	if (!vgic_cpu->lpis_enabled)
> +		return;
> +
> +	vgic_make_rdist_busy(vcpu, true);
> +
> +	irq = vgic_get_irq(vcpu->kvm, NULL, val);
> +	if (!irq)
> +		return;

Isn't the busy counter unbalanced if you return early?

--
Thanks,
Oliver

> +
> +	vgic_its_inv_lpi(vcpu->kvm, irq);
> +	vgic_put_irq(vcpu->kvm, irq);
> +
> +	vgic_make_rdist_busy(vcpu, false);
> +}
> +
> +static void vgic_mmio_write_invall(struct kvm_vcpu *vcpu,
> +				   gpa_t addr, unsigned int len,
> +				   unsigned long val)
> +{
> +	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> +
> +	if (!vgic_cpu->lpis_enabled)
> +		return;
> +
> +	vgic_make_rdist_busy(vcpu, true);
> +	vgic_its_invall(vcpu);
> +	vgic_make_rdist_busy(vcpu, false);
> +}
> +
>  /*
>   * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the
>   * redistributors, while SPIs are covered by registers in the distributor
> @@ -630,6 +683,15 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
>  	REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER,
>  		vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8,
>  		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
> +	REGISTER_DESC_WITH_LENGTH(GICR_INVLPIR,
> +		vgic_mmio_read_raz, vgic_mmio_write_invlpi, 8,
> +		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
> +	REGISTER_DESC_WITH_LENGTH(GICR_INVALLR,
> +		vgic_mmio_read_raz, vgic_mmio_write_invall, 8,
> +		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
> +	REGISTER_DESC_WITH_LENGTH(GICR_SYNCR,
> +		vgic_mmio_read_sync, vgic_mmio_write_wi, 8,
> +		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
>  	REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
>  		vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
>  		VGIC_ACCESS_32bit),
> diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
> index 3fd6c86a7ef3..53581e11f7c8 100644
> --- a/arch/arm64/kvm/vgic/vgic.h
> +++ b/arch/arm64/kvm/vgic/vgic.h
> @@ -317,6 +317,10 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm);
>  void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
>  void vgic_its_invalidate_cache(struct kvm *kvm);
>  
> +/* GICv4.1 MMIO interface */
> +int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
> +int vgic_its_invall(struct kvm_vcpu *vcpu);
> +
>  bool vgic_supports_direct_msis(struct kvm *kvm);
>  int vgic_v4_init(struct kvm *kvm);
>  void vgic_v4_teardown(struct kvm *kvm);
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index bb30a6803d9f..d54bb44d6d98 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -344,6 +344,7 @@ struct vgic_cpu {
>  	struct vgic_io_device	rd_iodev;
>  	struct vgic_redist_region *rdreg;
>  	u32 rdreg_index;
> +	atomic_t syncr_busy;
>  
>  	/* Contains the attributes and gpa of the LPI pending tables. */
>  	u64 pendbaser;
> -- 
> 2.34.1
> 
> _______________________________________________
> kvmarm mailing list
> kvmarm at lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm