[PATCH v4 06/12] KVM: arm64: implement basic ITS register handlers

Marc Zyngier marc.zyngier at arm.com
Thu Apr 7 07:35:56 PDT 2016


On 26/03/16 02:14, Andre Przywara wrote:
> Add emulation for some basic MMIO registers used in the ITS emulation.
> This includes:
> - GITS_{CTLR,TYPER,IIDR}
> - ID registers
> - GITS_{CBASER,CREADR,CWRITER}
>   those implement the ITS command buffer handling
> 
> Most of the handlers are pretty straight forward, but CWRITER goes
> some extra miles to allow fine grained locking. The idea here
> is to let only the first instance iterate through the command ring
> buffer, CWRITER accesses on other VCPUs meanwhile will be picked up
> by that first instance and handled as well. The ITS lock is thus only
> hold for very small periods of time and is dropped before the actual

s/hold/held/

> command handler is called.
> 
> Signed-off-by: Andre Przywara <andre.przywara at arm.com>
> ---
>  include/kvm/vgic/vgic.h            |   3 +
>  include/linux/irqchip/arm-gic-v3.h |   8 ++
>  virt/kvm/arm/vgic/its-emul.c       | 272 ++++++++++++++++++++++++++++++++++++-
>  virt/kvm/arm/vgic/vgic.h           |   6 +
>  virt/kvm/arm/vgic/vgic_init.c      |   2 +
>  5 files changed, 284 insertions(+), 7 deletions(-)
> 
> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
> index c79bed5..bafea11 100644
> --- a/include/kvm/vgic/vgic.h
> +++ b/include/kvm/vgic/vgic.h
> @@ -115,6 +115,9 @@ struct vgic_io_device {
>  struct vgic_its {
>  	bool			enabled;
>  	spinlock_t		lock;
> +	u64			cbaser;
> +	int			creadr;
> +	int			cwriter;

Irk. Please use explicitly sized types.

>  };
>  
>  struct vgic_dist {
> diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
> index a813c3e..7011b98 100644
> --- a/include/linux/irqchip/arm-gic-v3.h
> +++ b/include/linux/irqchip/arm-gic-v3.h
> @@ -179,15 +179,23 @@
>  #define GITS_BASER			0x0100
>  #define GITS_IDREGS_BASE		0xffd0
>  #define GITS_PIDR2			GICR_PIDR2
> +#define GITS_PIDR4			0xffd0
> +#define GITS_CIDR0			0xfff0
> +#define GITS_CIDR1			0xfff4
> +#define GITS_CIDR2			0xfff8
> +#define GITS_CIDR3			0xfffc
>  
>  #define GITS_TRANSLATER			0x10040
>  
>  #define GITS_CTLR_ENABLE		(1U << 0)
>  #define GITS_CTLR_QUIESCENT		(1U << 31)
>  
> +#define GITS_TYPER_PLPIS		(1UL << 0)
> +#define GITS_TYPER_IDBITS_SHIFT		8
>  #define GITS_TYPER_DEVBITS_SHIFT	13
>  #define GITS_TYPER_DEVBITS(r)		((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
>  #define GITS_TYPER_PTA			(1UL << 19)
> +#define GITS_TYPER_HWCOLLCNT_SHIFT	24
>  
>  #define GITS_CBASER_VALID		(1UL << 63)
>  #define GITS_CBASER_nCnB		(0UL << 59)
> diff --git a/virt/kvm/arm/vgic/its-emul.c b/virt/kvm/arm/vgic/its-emul.c
> index 49dd5e4..de8d360 100644
> --- a/virt/kvm/arm/vgic/its-emul.c
> +++ b/virt/kvm/arm/vgic/its-emul.c
> @@ -31,23 +31,263 @@
>  #include "vgic.h"
>  #include "vgic_mmio.h"
>  
> +#define BASER_BASE_ADDRESS(x) ((x) & 0xfffffffff000ULL)
> +
> +static int vgic_mmio_read_its_ctlr(struct kvm_vcpu *vcpu,
> +				   struct kvm_io_device *this,
> +				   gpa_t addr, int len, void *val)
> +{
> +	struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +	u32 reg;
> +
> +	reg = GITS_CTLR_QUIESCENT;

So your ITS is always in a quiescent state? Even when you're processing
the command queue? You'll have to convince me...

> +	if (its->enabled)
> +		reg |= GITS_CTLR_ENABLE;
> +
> +	write_mask32(reg, addr & 3, len, val);
> +
> +	return 0;
> +}
> +
> +static int vgic_mmio_write_its_ctlr(struct kvm_vcpu *vcpu,
> +				    struct kvm_io_device *this,
> +				    gpa_t addr, int len, const void *val)
> +{
> +	struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +	struct vgic_io_device *iodev = container_of(this,
> +						    struct vgic_io_device, dev);
> +
> +        if (addr - iodev->base_addr == 0)

whitespace issue.

> +		its->enabled = !!(*(u8*)val & GITS_CTLR_ENABLE);
> +
> +	return 0;
> +}
> +
> +static int vgic_mmio_read_its_typer(struct kvm_vcpu *vcpu,
> +				    struct kvm_io_device *this,
> +				    gpa_t addr, int len, void *val)
> +{
> +	u64 reg = GITS_TYPER_PLPIS;
> +
> +	/*
> +	 * We use linear CPU numbers for redistributor addressing,
> +	 * so GITS_TYPER.PTA is 0.
> +	 * To avoid memory waste on the guest side, we keep the
> +	 * number of IDBits and DevBits low for the time being.
> +	 * This could later be made configurable by userland.
> +	 * Since we have all collections in linked list, we claim
> +	 * that we can hold all of the collection tables in our
> +	 * own memory and that the ITT entry size is 1 byte (the
> +	 * smallest possible one).

All of this is going to bite us when we want to implement migration,
specially the HW collection bit.

> +	 */
> +	reg |= 0xff << GITS_TYPER_HWCOLLCNT_SHIFT;
> +	reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT;
> +	reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT;
> +
> +	write_mask64(reg, addr & 7, len, val);
> +
> +	return 0;
> +}
> +
> +static int vgic_mmio_read_its_iidr(struct kvm_vcpu *vcpu,
> +				   struct kvm_io_device *this,
> +				   gpa_t addr, int len, void *val)
> +{
> +	u32 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
> +
> +	write_mask32(reg, addr & 3, len, val);
> +
> +	return 0;
> +}
> +
> +static int vgic_mmio_read_its_idregs(struct kvm_vcpu *vcpu,
> +				     struct kvm_io_device *this,
> +				     gpa_t addr, int len, void *val)
> +{
> +	struct vgic_io_device *iodev = container_of(this,
> +						    struct vgic_io_device, dev);
> +	u32 reg = 0;
> +	int idreg = (addr & ~3) - iodev->base_addr + GITS_IDREGS_BASE;
> +
> +	switch (idreg) {
> +	case GITS_PIDR2:
> +		reg = GIC_PIDR2_ARCH_GICv3;

Are we leaving the lowest 4 bits to zero?

> +		break;
> +	case GITS_PIDR4:
> +		/* This is a 64K software visible page */
> +		reg = 0x40;

Same question.

Also, how about all the others PIDR registers?

> +		break;
> +	/* Those are the ID registers for (any) GIC. */
> +	case GITS_CIDR0:
> +		reg = 0x0d;
> +		break;
> +	case GITS_CIDR1:
> +		reg = 0xf0;
> +		break;
> +	case GITS_CIDR2:
> +		reg = 0x05;
> +		break;
> +	case GITS_CIDR3:
> +		reg = 0xb1;
> +		break;
> +	}

Given that these values are directly taken from the architecture, and
seem common to the whole GICv3 architecture when implemented by ARM, we
could have a common handler for the whole GICv3 implementatuin. Not a
bit deal though.

> +
> +	write_mask32(reg, addr & 3, len, val);
> +
> +	return 0;
> +}
> +
> +/*
> + * This function is called with both the ITS and the distributor lock dropped,
> + * so the actual command handlers must take the respective locks when needed.
> + */
> +static int vits_handle_command(struct kvm_vcpu *vcpu, u64 *its_cmd)
> +{
> +	return -ENODEV;
> +}
> +
> +static int vgic_mmio_read_its_cbaser(struct kvm_vcpu *vcpu,
> +				    struct kvm_io_device *this,
> +				    gpa_t addr, int len, void *val)
> +{
> +	struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +
> +	write_mask64(its->cbaser, addr & 7, len, val);
> +
> +	return 0;
> +}
> +
> +static int vgic_mmio_write_its_cbaser(struct kvm_vcpu *vcpu,
> +				      struct kvm_io_device *this,
> +				      gpa_t addr, int len, const void *val)
> +{
> +	struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +
> +	if (its->enabled)
> +		return 0;
> +
> +	its->cbaser = mask64(its->cbaser, addr & 7, len, val);
> +	its->creadr = 0;

Don't you need to acquire the command queue lock here?

> +
> +	return 0;
> +}
> +
> +static int its_cmd_buffer_size(struct kvm *kvm)
> +{
> +	struct vgic_its *its = &kvm->arch.vgic.its;
> +
> +	return ((its->cbaser & 0xff) + 1) << 12;
> +}
> +
> +static gpa_t its_cmd_buffer_base(struct kvm *kvm)
> +{
> +	struct vgic_its *its = &kvm->arch.vgic.its;
> +
> +	return BASER_BASE_ADDRESS(its->cbaser);
> +}
> +
> +/*
> + * By writing to CWRITER the guest announces new commands to be processed.
> + * Since we cannot read from guest memory inside the ITS spinlock, we
> + * iterate over the command buffer (with the lock dropped) until the read
> + * pointer matches the write pointer. Other VCPUs writing this register in the
> + * meantime will just update the write pointer, leaving the command
> + * processing to the first instance of the function.
> + */
> +static int vgic_mmio_write_its_cwriter(struct kvm_vcpu *vcpu,
> +				       struct kvm_io_device *this,
> +				       gpa_t addr, int len, const void *val)
> +{
> +	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> +	struct vgic_its *its = &dist->its;
> +	gpa_t cbaser = its_cmd_buffer_base(vcpu->kvm);
> +	u64 cmd_buf[4];
> +	u32 reg;
> +	bool finished;
> +
> +	reg = mask64(its->cwriter & 0xfffe0, addr & 7, len, val);
> +	reg &= 0xfffe0;
> +	if (reg > its_cmd_buffer_size(vcpu->kvm))
> +		return 0;
> +
> +	spin_lock(&its->lock);
> +
> +	/*
> +	 * If there is still another VCPU handling commands, let this
> +	 * one pick up the new CWRITER and process "our" new commands as well.
> +	 */

How do you detect that condition? All I see is a massive race here, with
two threads processing the queue in parallel, possibly corrupting each
other's data.

Please explain why you think this is safe.

> +	finished = (its->cwriter != its->creadr);
> +	its->cwriter = reg;
> +
> +	spin_unlock(&its->lock);
> +
> +	while (!finished) {
> +		int ret = kvm_read_guest(vcpu->kvm, cbaser + its->creadr,
> +					 cmd_buf, 32);
> +		if (ret) {
> +			/*
> +			 * Gah, we are screwed. Reset CWRITER to that command
> +			 * that we have finished processing and return.
> +			 */
> +			spin_lock(&its->lock);
> +			its->cwriter = its->creadr;
> +			spin_unlock(&its->lock);
> +			break;
> +		}
> +		vits_handle_command(vcpu, cmd_buf);
> +
> +		spin_lock(&its->lock);
> +		its->creadr += 32;
> +		if (its->creadr == its_cmd_buffer_size(vcpu->kvm))
> +			its->creadr = 0;
> +		finished = (its->creadr == its->cwriter);
> +		spin_unlock(&its->lock);
> +	}
> +
> +	return 0;
> +}
> +
> +static int vgic_mmio_read_its_cwriter(struct kvm_vcpu *vcpu,
> +				      struct kvm_io_device *this,
> +				      gpa_t addr, int len, void *val)
> +{
> +	struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +	u64 reg = its->cwriter & 0xfffe0;
> +
> +	write_mask64(reg, addr & 7, len, val);
> +
> +	return 0;
> +}
> +
> +static int vgic_mmio_read_its_creadr(struct kvm_vcpu *vcpu,
> +				     struct kvm_io_device *this,
> +				     gpa_t addr, int len, void *val)
> +{
> +	struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +	u64 reg = its->creadr & 0xfffe0;
> +
> +	write_mask64(reg, addr & 7, len, val);
> +
> +	return 0;
> +}
> +
>  struct vgic_register_region its_registers[] = {
>  	REGISTER_DESC_WITH_LENGTH(GITS_CTLR,
> -		vgic_mmio_read_raz, vgic_mmio_write_wi, 4),
> +		vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4),
>  	REGISTER_DESC_WITH_LENGTH(GITS_IIDR,
> -		vgic_mmio_read_raz, vgic_mmio_write_wi, 4),
> +		vgic_mmio_read_its_iidr, vgic_mmio_write_wi, 4),
>  	REGISTER_DESC_WITH_LENGTH(GITS_TYPER,
> -		vgic_mmio_read_raz, vgic_mmio_write_wi, 4),
> +		vgic_mmio_read_its_typer, vgic_mmio_write_wi, 4),
>  	REGISTER_DESC_WITH_LENGTH(GITS_CBASER,
> -		vgic_mmio_read_raz, vgic_mmio_write_wi, 8),
> +		vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8),
>  	REGISTER_DESC_WITH_LENGTH(GITS_CWRITER,
> -		vgic_mmio_read_raz, vgic_mmio_write_wi, 8),
> +		vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8),
>  	REGISTER_DESC_WITH_LENGTH(GITS_CREADR,
> -		vgic_mmio_read_raz, vgic_mmio_write_wi, 8),
> +		vgic_mmio_read_its_creadr, vgic_mmio_write_wi, 8),
>  	REGISTER_DESC_WITH_LENGTH(GITS_BASER,
>  		vgic_mmio_read_raz, vgic_mmio_write_wi, 0x40),
>  	REGISTER_DESC_WITH_LENGTH(GITS_IDREGS_BASE,
> -		vgic_mmio_read_raz, vgic_mmio_write_wi, 0x30),
> +		vgic_mmio_read_its_idregs, vgic_mmio_write_wi, 0x30),
>  };
>  
>  /* This is called on setting the LPI enable bit in the redistributor. */
> @@ -59,9 +299,14 @@ int vits_init(struct kvm *kvm)
>  {
>  	struct vgic_dist *dist = &kvm->arch.vgic;
>  	struct vgic_its *its = &dist->its;
> +	int nr_vcpus = atomic_read(&kvm->online_vcpus);
>  	struct vgic_io_device *regions;
>  	int ret, i;
>  
> +	dist->pendbaser = kcalloc(nr_vcpus, sizeof(u64), GFP_KERNEL);
> +	if (!dist->pendbaser)
> +		return -ENOMEM;
> +
>  	spin_lock_init(&its->lock);
>  
>  	regions = kmalloc_array(ARRAY_SIZE(its_registers),
> @@ -82,3 +327,16 @@ int vits_init(struct kvm *kvm)
>  
>  	return -ENXIO;
>  }
> +
> +void vits_destroy(struct kvm *kvm)
> +{
> +	struct vgic_dist *dist = &kvm->arch.vgic;
> +	struct vgic_its *its = &dist->its;
> +
> +	if (!vgic_has_its(kvm))
> +		return;
> +
> +	kfree(dist->pendbaser);
> +
> +	its->enabled = false;
> +}
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index 4e7dcb8..08f97d1 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -63,6 +63,7 @@ int vgic_register_redist_regions(struct kvm *kvm, gpa_t dist_base_address);
>  
>  int vits_init(struct kvm *kvm);
>  void vgic_enable_lpis(struct kvm_vcpu *vcpu);
> +void vits_destroy(struct kvm *kvm);
>  #else
>  static inline void vgic_v3_irq_change_affinity(struct kvm *kvm, u32 intid,
>  					       u64 mpidr)
> @@ -137,6 +138,11 @@ static inline void vgic_enable_lpis(struct kvm_vcpu *vcpu)
>  {
>  	return;
>  }
> +
> +static inline void vits_destroy(struct kvm *kvm)
> +{
> +	return;
> +}
>  #endif
>  
>  void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
> diff --git a/virt/kvm/arm/vgic/vgic_init.c b/virt/kvm/arm/vgic/vgic_init.c
> index dcfb93d..e4459e3 100644
> --- a/virt/kvm/arm/vgic/vgic_init.c
> +++ b/virt/kvm/arm/vgic/vgic_init.c
> @@ -298,6 +298,8 @@ void kvm_vgic_destroy(struct kvm *kvm)
>  
>  	kvm_vgic_dist_destroy(kvm);
>  
> +	vits_destroy(kvm);
> +
>  	kvm_for_each_vcpu(i, vcpu, kvm)
>  		kvm_vgic_vcpu_destroy(vcpu);
>  }
> 

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...



More information about the linux-arm-kernel mailing list