[PATCH v4 2/8] arm/arm64: KVM: vgic: switch to dynamic allocation

Fri Sep 12 10:43:20 PDT 2014

On Fri, Sep 12, 2014 at 10:13:11AM +0100, Marc Zyngier wrote:
> On 11/09/14 23:36, Christoffer Dall wrote:
> > On Thu, Sep 11, 2014 at 12:09:09PM +0100, Marc Zyngier wrote:
> >> So far, all the VGIC data structures are statically defined by the
> >> *maximum* number of vcpus and interrupts it supports. It means that
> >> we always have to oversize it to cater for the worse case.
> >>
> >> Start by changing the data structures to be dynamically sizeable,
> >> and allocate them at runtime.
> >>
> >> The sizes are still very static though.
> >>
> >> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
> >> ---
> >>  arch/arm/kvm/arm.c     |   3 +
> >>  include/kvm/arm_vgic.h |  76 ++++++++++++----
> >>  virt/kvm/arm/vgic.c    | 237 ++++++++++++++++++++++++++++++++++++++++++-------
> >>  3 files changed, 267 insertions(+), 49 deletions(-)
> >>
> >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> >> index a99e0cd..923a01d 100644
> >> --- a/arch/arm/kvm/arm.c
> >> +++ b/arch/arm/kvm/arm.c
> >> @@ -172,6 +172,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
> >>                       kvm->vcpus[i] = NULL;
> >>               }
> >>       }
> >> +
> >> +     kvm_vgic_destroy(kvm);
> >>  }
> >>
> >>  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> >> @@ -253,6 +255,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
> >>  {
> >>       kvm_mmu_free_memory_caches(vcpu);
> >>       kvm_timer_vcpu_terminate(vcpu);
> >> +     kvm_vgic_vcpu_destroy(vcpu);
> >>       kmem_cache_free(kvm_vcpu_cache, vcpu);
> >>  }
> >>
> >> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> >> index f074539..bdaac57 100644
> >> --- a/include/kvm/arm_vgic.h
> >> +++ b/include/kvm/arm_vgic.h
> >> @@ -54,19 +54,33 @@
> >>   * - a bunch of shared interrupts (SPI)
> >>   */
> >>  struct vgic_bitmap {
> >> -     union {
> >> -             u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
> >> -             DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
> >> -     } percpu[VGIC_MAX_CPUS];
> >> -     union {
> >> -             u32 reg[VGIC_NR_SHARED_IRQS / 32];
> >> -             DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
> >> -     } shared;
> >> +     /*
> >> +      * - One UL per VCPU for private interrupts (assumes UL is at
> >> +      *   least 32 bits)
> >> +      * - As many UL as necessary for shared interrupts.
> >> +      *
> >> +      * The private interrupts are accessed via the "private"
> >> +      * field, one UL per vcpu (the state for vcpu n is in
> >> +      * private[n]). The shared interrupts are accessed via the
> >> +      * "shared" pointer (IRQn state is at bit n-32 in the bitmap).
> >> +      */
> >> +     unsigned long *private;
> >> +     unsigned long *shared;
> > 
> > the comment above the define for REG_OFFSET_SWIZZLE still talks about
> > the unions in struct vgic_bitmap, which is no longer true.  Mind
> > updating that comment?
> 
> Damned, thought I fixed that. Will update it.
> 
> >>  };
> >>
> >>  struct vgic_bytemap {
> >> -     u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
> >> -     u32 shared[VGIC_NR_SHARED_IRQS  / 4];
> >> +     /*
> >> +      * - 8 u32 per VCPU for private interrupts
> >> +      * - As many u32 as necessary for shared interrupts.
> >> +      *
> >> +      * The private interrupts are accessed via the "private"
> >> +      * field, (the state for vcpu n is in private[n*8] to
> >> +      * private[n*8 + 7]). The shared interrupts are accessed via
> >> +      * the "shared" pointer (IRQn state is at byte (n-32)%4 of the
> >> +      * shared[(n-32)/4] word).
> >> +      */
> >> +     u32 *private;
> >> +     u32 *shared;
> >>  };
> >>
> >>  struct kvm_vcpu;
> >> @@ -127,6 +141,9 @@ struct vgic_dist {
> >>       bool                    in_kernel;
> >>       bool                    ready;
> >>
> >> +     int                     nr_cpus;
> >> +     int                     nr_irqs;
> >> +
> >>       /* Virtual control interface mapping */
> >>       void __iomem            *vctrl_base;
> >>
> >> @@ -166,15 +183,36 @@ struct vgic_dist {
> >>       /* Level/edge triggered */
> >>       struct vgic_bitmap      irq_cfg;
> >>
> >> -     /* Source CPU per SGI and target CPU */
> >> -     u8                      irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
> >> +     /*
> >> +      * Source CPU per SGI and target CPU:
> >> +      *
> >> +      * Each byte represent a SGI observable on a VCPU, each bit of
> >> +      * this byte indicating if the corresponding VCPU has
> >> +      * generated this interrupt. This is a GICv2 feature only.
> >> +      *
> >> +      * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are
> >> +      * the SGIs observable on VCPUn.
> >> +      */
> >> +     u8                      *irq_sgi_sources;
> >>
> >> -     /* Target CPU for each IRQ */
> >> -     u8                      irq_spi_cpu[VGIC_NR_SHARED_IRQS];
> >> -     struct vgic_bitmap      irq_spi_target[VGIC_MAX_CPUS];
> >> +     /*
> >> +      * Target CPU for each SPI:
> >> +      *
> >> +      * Array of available SPI, each byte indicating the target
> >> +      * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32].
> >> +      */
> >> +     u8                      *irq_spi_cpu;
> >> +
> >> +     /*
> >> +      * Reverse lookup of irq_spi_cpu for faster compute pending:
> >> +      *
> >> +      * Array of bitmaps, one per VCPU, describing is IRQn is
> > 
> > ah, describing *if* ?
> 
> Indeed. Will fix.
> 
> >> +      * routed to a particular VCPU.
> >> +      */
> >> +     struct vgic_bitmap      *irq_spi_target;
> >>
> >>       /* Bitmap indicating which CPU has something pending */
> >> -     unsigned long           irq_pending_on_cpu;
> >> +     unsigned long           *irq_pending_on_cpu;
> >>  #endif
> >>  };
> >>
> >> @@ -204,11 +242,11 @@ struct vgic_v3_cpu_if {
> >>  struct vgic_cpu {
> >>  #ifdef CONFIG_KVM_ARM_VGIC
> >>       /* per IRQ to LR mapping */
> >> -     u8              vgic_irq_lr_map[VGIC_NR_IRQS];
> >> +     u8              *vgic_irq_lr_map;
> >>
> >>       /* Pending interrupts on this VCPU */
> >>       DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
> >> -     DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS);
> >> +     unsigned long   *pending_shared;
> >>
> >>       /* Bitmap of used/free list registers */
> >>       DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
> >> @@ -239,7 +277,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
> >>  int kvm_vgic_hyp_init(void);
> >>  int kvm_vgic_init(struct kvm *kvm);
> >>  int kvm_vgic_create(struct kvm *kvm);
> >> +void kvm_vgic_destroy(struct kvm *kvm);
> >>  int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
> >> +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
> >>  void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
> >>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
> >>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
> >> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> >> index d3299d4..92c086e 100644
> >> --- a/virt/kvm/arm/vgic.c
> >> +++ b/virt/kvm/arm/vgic.c
> >> @@ -95,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
> >>  static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
> >>  static void vgic_update_state(struct kvm *kvm);
> >>  static void vgic_kick_vcpus(struct kvm *kvm);
> >> +static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
> >>  static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
> >>  static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
> >>  static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
> >> @@ -124,23 +125,45 @@ static const struct vgic_params *vgic;
> >>  #define REG_OFFSET_SWIZZLE   0
> >>  #endif
> >>
> >> +static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
> >> +{
> >> +     int nr_longs;
> >> +
> >> +     nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
> >> +
> >> +     b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
> >> +     if (!b->private)
> >> +             return -ENOMEM;
> >> +
> >> +     b->shared = b->private + nr_cpus;
> >> +
> >> +     return 0;
> >> +}
> >> +
> >> +static void vgic_free_bitmap(struct vgic_bitmap *b)
> >> +{
> >> +     kfree(b->private);
> >> +     b->private = NULL;
> >> +     b->shared = NULL;
> >> +}
> >> +
> >>  static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
> >>                               int cpuid, u32 offset)
> >>  {
> >>       offset >>= 2;
> >>       if (!offset)
> >> -             return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE);
> >> +             return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
> >>       else
> >> -             return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
> >> +             return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
> >>  }
> >>
> >>  static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
> >>                                  int cpuid, int irq)
> >>  {
> >>       if (irq < VGIC_NR_PRIVATE_IRQS)
> >> -             return test_bit(irq, x->percpu[cpuid].reg_ul);
> >> +             return test_bit(irq, x->private + cpuid);
> >>
> >> -     return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
> >> +     return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
> >>  }
> >>
> >>  static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
> >> @@ -149,9 +172,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
> >>       unsigned long *reg;
> >>
> >>       if (irq < VGIC_NR_PRIVATE_IRQS) {
> >> -             reg = x->percpu[cpuid].reg_ul;
> >> +             reg = x->private + cpuid;
> >>       } else {
> >> -             reg =  x->shared.reg_ul;
> >> +             reg = x->shared;
> >>               irq -= VGIC_NR_PRIVATE_IRQS;
> >>       }
> >>
> >> @@ -163,24 +186,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
> >>
> >>  static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
> >>  {
> >> -     if (unlikely(cpuid >= VGIC_MAX_CPUS))
> >> -             return NULL;
> >> -     return x->percpu[cpuid].reg_ul;
> >> +     return x->private + cpuid;
> >>  }
> >>
> >>  static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
> >>  {
> >> -     return x->shared.reg_ul;
> >> +     return x->shared;
> >> +}
> >> +
> >> +static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
> >> +{
> >> +     int size;
> >> +
> >> +     size  = nr_cpus * VGIC_NR_PRIVATE_IRQS;
> >> +     size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
> >> +
> >> +     x->private = kzalloc(size, GFP_KERNEL);
> >> +     if (!x->private)
> >> +             return -ENOMEM;
> >> +
> >> +     x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
> >> +     return 0;
> >> +}
> >> +
> >> +static void vgic_free_bytemap(struct vgic_bytemap *b)
> >> +{
> >> +     kfree(b->private);
> >> +     b->private = NULL;
> >> +     b->shared = NULL;
> >>  }
> >>
> >>  static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
> >>  {
> >> -     offset >>= 2;
> >> -     BUG_ON(offset > (VGIC_NR_IRQS / 4));
> >> -     if (offset < 8)
> >> -             return x->percpu[cpuid] + offset;
> >> -     else
> >> -             return x->shared + offset - 8;
> >> +     u32 *reg;
> >> +
> >> +     if (offset < VGIC_NR_PRIVATE_IRQS) {
> >> +             reg = x->private;
> >> +             offset += cpuid * VGIC_NR_PRIVATE_IRQS;
> >> +     } else {
> >> +             reg = x->shared;
> >> +             offset -= VGIC_NR_PRIVATE_IRQS;
> >> +     }
> >> +
> >> +     return reg + (offset / sizeof(u32));
> >>  }
> >>
> >>  #define VGIC_CFG_LEVEL       0
> >> @@ -739,7 +787,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
> >>                */
> >>               vgic_dist_irq_set_pending(vcpu, lr.irq);
> >>               if (lr.irq < VGIC_NR_SGIS)
> >> -                     dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
> >> +                     *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
> >>               lr.state &= ~LR_STATE_PENDING;
> >>               vgic_set_lr(vcpu, i, lr);
> >>
> >> @@ -773,7 +821,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
> >>       /* Copy source SGIs from distributor side */
> >>       for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
> >>               int shift = 8 * (sgi - min_sgi);
> >> -             reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
> >> +             reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
> >>       }
> >>
> >>       mmio_data_write(mmio, ~0, reg);
> >> @@ -797,14 +845,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
> >>       /* Clear pending SGIs on the distributor */
> >>       for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
> >>               u8 mask = reg >> (8 * (sgi - min_sgi));
> >> +             u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
> >>               if (set) {
> >> -                     if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
> >> +                     if ((*src & mask) != mask)
> >>                               updated = true;
> >> -                     dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
> >> +                     *src |= mask;
> >>               } else {
> >> -                     if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
> >> +                     if (*src & mask)
> >>                               updated = true;
> >> -                     dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
> >> +                     *src &= ~mask;
> >>               }
> >>       }
> >>
> >> @@ -988,6 +1037,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
> >>       return true;
> >>  }
> >>
> >> +static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
> >> +{
> >> +     return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
> >> +}
> >> +
> >>  static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
> >>  {
> >>       struct kvm *kvm = vcpu->kvm;
> >> @@ -1021,7 +1075,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
> >>               if (target_cpus & 1) {
> >>                       /* Flag the SGI as pending */
> >>                       vgic_dist_irq_set_pending(vcpu, sgi);
> >> -                     dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
> >> +                     *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
> >>                       kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
> >>               }
> >>
> >> @@ -1068,14 +1122,14 @@ static void vgic_update_state(struct kvm *kvm)
> >>       int c;
> >>
> >>       if (!dist->enabled) {
> >> -             set_bit(0, &dist->irq_pending_on_cpu);
> >> +             set_bit(0, dist->irq_pending_on_cpu);
> >>               return;
> >>       }
> >>
> >>       kvm_for_each_vcpu(c, vcpu, kvm) {
> >>               if (compute_pending_for_cpu(vcpu)) {
> >>                       pr_debug("CPU%d has pending interrupts\n", c);
> >> -                     set_bit(c, &dist->irq_pending_on_cpu);
> >> +                     set_bit(c, dist->irq_pending_on_cpu);
> >>               }
> >>       }
> >>  }
> >> @@ -1232,14 +1286,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
> >>       int vcpu_id = vcpu->vcpu_id;
> >>       int c;
> >>
> >> -     sources = dist->irq_sgi_sources[vcpu_id][irq];
> >> +     sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
> >>
> >>       for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
> >>               if (vgic_queue_irq(vcpu, c, irq))
> >>                       clear_bit(c, &sources);
> >>       }
> >>
> >> -     dist->irq_sgi_sources[vcpu_id][irq] = sources;
> >> +     *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
> >>
> >>       /*
> >>        * If the sources bitmap has been cleared it means that we
> >> @@ -1327,7 +1381,7 @@ epilog:
> >>                * us. Claim we don't have anything pending. We'll
> >>                * adjust that if needed while exiting.
> >>                */
> >> -             clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
> >> +             clear_bit(vcpu_id, dist->irq_pending_on_cpu);
> >>       }
> >>  }
> >>
> >> @@ -1429,7 +1483,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
> >>       /* Check if we still have something up our sleeve... */
> >>       pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
> >>       if (level_pending || pending < vgic->nr_lr)
> >> -             set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
> >> +             set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
> >>  }
> >>
> >>  void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
> >> @@ -1463,7 +1517,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
> >>       if (!irqchip_in_kernel(vcpu->kvm))
> >>               return 0;
> >>
> >> -     return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
> >> +     return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
> >>  }
> >>
> >>  static void vgic_kick_vcpus(struct kvm *kvm)
> >> @@ -1558,7 +1612,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
> >>
> >>       if (level) {
> >>               vgic_cpu_irq_set(vcpu, irq_num);
> >> -             set_bit(cpuid, &dist->irq_pending_on_cpu);
> >> +             set_bit(cpuid, dist->irq_pending_on_cpu);
> >>       }
> >>
> >>  out:
> >> @@ -1602,6 +1656,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
> >>       return IRQ_HANDLED;
> >>  }
> >>
> >> +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
> >> +{
> >> +     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> >> +
> >> +     kfree(vgic_cpu->pending_shared);
> >> +     kfree(vgic_cpu->vgic_irq_lr_map);
> >> +     vgic_cpu->pending_shared = NULL;
> >> +     vgic_cpu->vgic_irq_lr_map = NULL;
> >> +}
> >> +
> >> +static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
> >> +{
> >> +     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> >> +
> >> +     int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
> > 
> > [copying question from last review round, apologies if I'm being stupid]
> > 
> > are we guaranteed that the numerator is always a multiple of 8? if not,
> > won't you end up allocating one less byte than needed?
> 
> We force the allocation to be a multiple of 32, as per the GIC spec (we
> enforce this in arm_vgic.h for the legacy behaviour, and in the last
> patch for the dynamic case).
> 
right, in that case, with the tiny doc fixes:

Reviewed-by: Christoffer Dall <christoffer.dall at linaro.org>