[PATCH v3 06/11] KVM: arm/arm64: vgic: Allow dynamic mapping of physical/virtual interrupts

Tue Aug 4 10:36:50 PDT 2015

On Tue, Aug 04, 2015 at 04:27:03PM +0100, Marc Zyngier wrote:
> On 04/08/15 14:04, Christoffer Dall wrote:
> > On Fri, Jul 24, 2015 at 04:55:04PM +0100, Marc Zyngier wrote:
> >> In order to be able to feed physical interrupts to a guest, we need
> >> to be able to establish the virtual-physical mapping between the two
> >> worlds.
> >>
> >> The mappings are kept in a set of RCU lists, indexed by virtual interrupts.
> >>
> >> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
> >> ---
> >>  arch/arm/kvm/arm.c     |   2 +
> >>  include/kvm/arm_vgic.h |  26 +++++++
> >>  virt/kvm/arm/vgic.c    | 189 ++++++++++++++++++++++++++++++++++++++++++++++++-
> >>  3 files changed, 216 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> >> index f1bf418..ce404a5 100644
> >> --- a/arch/arm/kvm/arm.c
> >> +++ b/arch/arm/kvm/arm.c
> >> @@ -125,6 +125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> >>       if (ret)
> >>               goto out_free_stage2_pgd;
> >>
> >> +     kvm_vgic_early_init(kvm);
> >>       kvm_timer_init(kvm);
> >>
> >>       /* Mark the initial VMID generation invalid */
> >> @@ -249,6 +250,7 @@ out:
> >>
> >>  void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
> >>  {
> >> +     kvm_vgic_vcpu_early_init(vcpu);
> >>  }
> >>
> >>  void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
> >> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> >> index a881e39..68212a1 100644
> >> --- a/include/kvm/arm_vgic.h
> >> +++ b/include/kvm/arm_vgic.h
> >> @@ -159,6 +159,20 @@ struct vgic_io_device {
> >>       struct kvm_io_device dev;
> >>  };
> >>
> >> +struct irq_phys_map {
> >> +     u32                     virt_irq;
> >> +     u32                     phys_irq;
> >> +     u32                     irq;
> >> +     bool                    deleted;
> >> +     bool                    active;
> >> +};
> >> +
> >> +struct irq_phys_map_entry {
> >> +     struct list_head        entry;
> >> +     struct rcu_head         rcu;
> >> +     struct irq_phys_map     map;
> >> +};
> >> +
> >>  struct vgic_dist {
> >>       spinlock_t              lock;
> >>       bool                    in_kernel;
> >> @@ -256,6 +270,10 @@ struct vgic_dist {
> >>       struct vgic_vm_ops      vm_ops;
> >>       struct vgic_io_device   dist_iodev;
> >>       struct vgic_io_device   *redist_iodevs;
> >> +
> >> +     /* Virtual irq to hwirq mapping */
> >> +     spinlock_t              irq_phys_map_lock;
> >> +     struct list_head        irq_phys_map_list;
> >>  };
> >>
> >>  struct vgic_v2_cpu_if {
> >> @@ -307,6 +325,9 @@ struct vgic_cpu {
> >>               struct vgic_v2_cpu_if   vgic_v2;
> >>               struct vgic_v3_cpu_if   vgic_v3;
> >>       };
> >> +
> >> +     /* Protected by the distributor's irq_phys_map_lock */
> >> +     struct list_head        irq_phys_map_list;
> >>  };
> >>
> >>  #define LR_EMPTY     0xff
> >> @@ -321,8 +342,10 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
> >>  int kvm_vgic_hyp_init(void);
> >>  int kvm_vgic_map_resources(struct kvm *kvm);
> >>  int kvm_vgic_get_max_vcpus(void);
> >> +void kvm_vgic_early_init(struct kvm *kvm);
> >>  int kvm_vgic_create(struct kvm *kvm, u32 type);
> >>  void kvm_vgic_destroy(struct kvm *kvm);
> >> +void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
> >>  void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
> >>  void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
> >>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
> >> @@ -331,6 +354,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
> >>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
> >>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
> >>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> >> +struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
> >> +                                        int virt_irq, int irq);
> >> +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
> >>
> >>  #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
> >>  #define vgic_initialized(k)  (!!((k)->arch.vgic.nr_cpus))
> >> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> >> index 5bd1695..c74d929 100644
> >> --- a/virt/kvm/arm/vgic.c
> >> +++ b/virt/kvm/arm/vgic.c
> >> @@ -24,6 +24,7 @@
> >>  #include <linux/of.h>
> >>  #include <linux/of_address.h>
> >>  #include <linux/of_irq.h>
> >> +#include <linux/rculist.h>
> >>  #include <linux/uaccess.h>
> >>
> >>  #include <asm/kvm_emulate.h>
> >> @@ -82,6 +83,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
> >>  static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
> >>  static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
> >>  static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
> >> +static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
> >> +                                             int virt_irq);
> >>
> >>  static const struct vgic_ops *vgic_ops;
> >>  static const struct vgic_params *vgic;
> >> @@ -1583,6 +1586,166 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
> >>       return IRQ_HANDLED;
> >>  }
> >>
> >> +static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
> >> +                                                 int virt_irq)
> >> +{
> >> +     if (virt_irq < VGIC_NR_PRIVATE_IRQS)
> >> +             return &vcpu->arch.vgic_cpu.irq_phys_map_list;
> >> +     else
> >> +             return &vcpu->kvm->arch.vgic.irq_phys_map_list;
> >> +}
> >> +
> >> +/**
> >> + * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
> >> + * @vcpu: The VCPU pointer
> >> + * @virt_irq: The virtual irq number
> >> + * @irq: The Linux IRQ number
> >> + *
> >> + * Establish a mapping between a guest visible irq (@virt_irq) and a
> >> + * Linux irq (@irq). On injection, @virt_irq will be associated with
> >> + * the physical interrupt represented by @irq.
> >> + *
> >> + * Returns a valid pointer on success, and an error pointer otherwise
> >> + */
> >> +struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
> >> +                                        int virt_irq, int irq)
> >> +{
> >> +     struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> >> +     struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
> >> +     struct irq_phys_map *map;
> >> +     struct irq_phys_map_entry *entry;
> >> +     struct irq_desc *desc;
> >> +     struct irq_data *data;
> >> +     int phys_irq;
> >> +
> >> +     desc = irq_to_desc(irq);
> >> +     if (!desc) {
> >> +             kvm_err("kvm_vgic_map_phys_irq: no interrupt descriptor\n");
> > 
> > super over-ridiculous nit:
> > if you respin you could consider "%s: no...", __FUNC__
> 
> Sure.
> 
> > 
> >> +             return ERR_PTR(-EINVAL);
> >> +     }
> >> +
> >> +     data = irq_desc_get_irq_data(desc);
> >> +     while (data->parent_data)
> >> +             data = data->parent_data;
> >> +
> >> +     phys_irq = data->hwirq;
> >> +
> >> +     /* Create a new mapping */
> >> +     entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> >> +     if (!entry)
> >> +             return ERR_PTR(-ENOMEM);
> >> +
> >> +     spin_lock(&dist->irq_phys_map_lock);
> >> +
> >> +     /* Try to match an existing mapping */
> >> +     map = vgic_irq_map_search(vcpu, virt_irq);
> >> +     if (map) {
> >> +             /* Make sure this mapping matches */
> > 
> > why do we need to check that it matches?  Is it ever allowed to have
> > multiple mappings of a virtual interrupt?
> > 
> 
> Multiple *different* mappings, no. What you can have though is issuing
> requests for the *same* mapping repeatedly. This is what happens with
> the timer, for example: kvm_timer_vcpu_reset is going to be called more
> than once over the lifetime of a VM, requesting a mapping each time
> without necessarily destroying the previous one.
> 
> >> +             if (map->phys_irq != phys_irq   ||
> >> +                 map->irq      != irq)
> >> +                     map = ERR_PTR(-EINVAL);
> >> +
> >> +             goto out;
> > 
> > why is this a valid situation?  Shouldn't you return -EEXIST or
> > something instead?  (that would also simplify the error-checking free
> > routine below).
> > 
> > If you want it to be valid to map the same virq to irq multiple times
> > (to free the caller from some bookkeeping, I presume?) then that should
> > be part of the function documentation, and I think we should add a
> > "found existing mapping" comment above the goto out statement.
> 
> Fair enough. I'll add some comments to that effect.
> 

thanks!

> >> +     }
> >> +
> >> +     map           = &entry->map;
> >> +     map->virt_irq = virt_irq;
> >> +     map->phys_irq = phys_irq;
> >> +     map->irq      = irq;
> >> +
> >> +     list_add_tail_rcu(&entry->entry, root);
> >> +
> >> +out:
> >> +     spin_unlock(&dist->irq_phys_map_lock);
> >> +     /* If we've found a hit in the existing list, free the useless
> >> +      * entry */
> >> +     if (IS_ERR(map) || map != &entry->map)
> >> +             kfree(entry);
> >> +     return map;
> >> +}
> >> +
> >> +static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
> >> +                                             int virt_irq)
> >> +{
> >> +     struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
> >> +     struct irq_phys_map_entry *entry;
> >> +     struct irq_phys_map *map;
> >> +
> >> +     rcu_read_lock();
> >> +
> >> +     list_for_each_entry_rcu(entry, root, entry) {
> >> +             map = &entry->map;
> >> +             if (map->virt_irq == virt_irq && !map->deleted) {
> >> +                     rcu_read_unlock();
> >> +                     return map;
> >> +             }
> >> +     }
> >> +
> >> +     rcu_read_unlock();
> >> +
> >> +     return NULL;
> >> +}
> >> +
> >> +static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
> >> +{
> >> +     struct irq_phys_map_entry *entry;
> >> +
> >> +     entry = container_of(rcu, struct irq_phys_map_entry, rcu);
> >> +     kfree(entry);
> >> +}
> >> +
> >> +/**
> >> + * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
> >> + * @vcpu: The VCPU pointer
> >> + * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
> >> + *
> >> + * Remove an existing mapping between virtual and physical interrupts.
> >> + */
> >> +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
> >> +{
> >> +     struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> >> +     struct irq_phys_map_entry *entry;
> >> +     struct list_head *root;
> >> +
> >> +     if (!map)
> >> +             return -EINVAL;
> >> +
> >> +     root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
> >> +
> >> +     spin_lock(&dist->irq_phys_map_lock);
> >> +
> >> +     list_for_each_entry(entry, root, entry) {
> >> +             if (&entry->map == map && !map->deleted) {
> >> +                     map->deleted = true;
> > 
> > why do you need the 'deleted' flag?  You now search the list only after
> > holding the lock, so the race I pointed out before doesn't exist
> > anymore.  Is there an additional issue that the deleted flag is taking
> > care of?
> 
> This could still race with a destroy occuring before we take the lock,
> and before we get get RCU to do the cleanup (the list would still be
> populated).
> 

That's not how I understand list_del_rcu; I think it deletes the entry
immediately with the right memory barriers.  It is only the free
operation that happens on rcu sync and can be/is deferred.

So I'm pretty sure that when you search the list after holding the lock,
there is no race.

> > 
> >> +                     list_del_rcu(&entry->entry);
> >> +                     call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
> >> +                     break;
> >> +             }
> >> +     }
> >> +
> >> +     spin_unlock(&dist->irq_phys_map_lock);
> >> +
> >> +     return 0;
> >> +}
> >> +
> >> +static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root)
> >> +{
> >> +     struct vgic_dist *dist = &kvm->arch.vgic;
> >> +     struct irq_phys_map_entry *entry;
> >> +
> >> +     spin_lock(&dist->irq_phys_map_lock);
> >> +
> >> +     list_for_each_entry(entry, root, entry) {
> >> +             if (!entry->map.deleted) {
> >> +                     entry->map.deleted = true;
> >> +                     list_del_rcu(&entry->entry);
> >> +                     call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
> >> +             }
> >> +     }
> >> +
> >> +     spin_unlock(&dist->irq_phys_map_lock);
> >> +}
> >> +
> >>  void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
> >>  {
> >>       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> >> @@ -1591,6 +1754,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
> >>       kfree(vgic_cpu->active_shared);
> >>       kfree(vgic_cpu->pend_act_shared);
> >>       kfree(vgic_cpu->vgic_irq_lr_map);
> >> +     vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
> >>       vgic_cpu->pending_shared = NULL;
> >>       vgic_cpu->active_shared = NULL;
> >>       vgic_cpu->pend_act_shared = NULL;
> >> @@ -1628,6 +1792,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
> >>  }
> >>
> >>  /**
> >> + * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage
> > 
> > earliest possible... oh how I love the vgic code.
> > 
> 
> You must be the only one! ;-)
> 
> > If you have the energy and context to write a comment in the beginning
> > of this file describing the init flow, I think that would be useful...
> > 
> 
> How about starting with this?
> 
> <quote>
> There are multiple stages to the vgic initialization, both for the
> distributor and the CPU interfaces.
> 
> Distributor:
> 
> - kvm_vgic_early_init(): initialization of static data that doesn't
> depend on any sizing information or emulation type. No allocation is
> allowed there.
> 
> - vgic_init(): allocation and initialization of the generic data
> structures that depend on sizing information (number of CPUs, number of
> interrupts). Also initializes the vcpu specific data structures. Can be
> executed lazily for GICv2. [to be renamed to kvm_vgic_init??]
> 
> CPU Interface:
> 
> - kvm_vgic_cpu_early_init(): initialization of static data that doesn't
> depend on any sizing information or emulation type. No allocation is
> allowed there.
> </quote>
> 
> I'm sure we can expand it as we go...
> 

yes, this is great, it made me feel comfortable with the whole thing.
Thanks!

> >> + *
> >> + * No memory allocation should be performed here, only static init.
> >> + */
> >> +void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
> >> +{
> >> +     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> >> +     INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list);
> >> +}
> >> +
> >> +/**
> >>   * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
> >>   *
> >>   * The host's GIC naturally limits the maximum amount of VCPUs a guest
> >> @@ -1664,6 +1839,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
> >>       kfree(dist->irq_spi_target);
> >>       kfree(dist->irq_pending_on_cpu);
> >>       kfree(dist->irq_active_on_cpu);
> >> +     vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list);
> >>       dist->irq_sgi_sources = NULL;
> >>       dist->irq_spi_cpu = NULL;
> >>       dist->irq_spi_target = NULL;
> >> @@ -1787,6 +1963,18 @@ static int init_vgic_model(struct kvm *kvm, int type)
> >>       return 0;
> >>  }
> >>
> >> +/**
> >> + * kvm_vgic_early_init - Earliest possible vgic initialization stage
> >> + *
> >> + * No memory allocation should be performed here, only static init.
> >> + */
> >> +void kvm_vgic_early_init(struct kvm *kvm)
> >> +{
> >> +     spin_lock_init(&kvm->arch.vgic.lock);
> > 
> > so here we're initializing the data structures even before the vgic is
> > created, right?  So therefore it's safe to move the vgic.lock init up
> > here?
> 
> Yes. The distributor structure is part of the struct kvm, hence safe to
> poke early on. This doesn't depend on any form of sizing or emulation
> type either.
> 

ok, slight warm fuzzy feeling.

Thanks,
-Christoffer