[PATCH v3 06/11] KVM: arm/arm64: vgic: Allow dynamic mapping of physical/virtual interrupts
Christoffer Dall
christoffer.dall at linaro.org
Tue Aug 4 10:36:50 PDT 2015
On Tue, Aug 04, 2015 at 04:27:03PM +0100, Marc Zyngier wrote:
> On 04/08/15 14:04, Christoffer Dall wrote:
> > On Fri, Jul 24, 2015 at 04:55:04PM +0100, Marc Zyngier wrote:
> >> In order to be able to feed physical interrupts to a guest, we need
> >> to be able to establish the virtual-physical mapping between the two
> >> worlds.
> >>
> >> The mappings are kept in a set of RCU lists, indexed by virtual interrupts.
> >>
> >> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
> >> ---
> >> arch/arm/kvm/arm.c | 2 +
> >> include/kvm/arm_vgic.h | 26 +++++++
> >> virt/kvm/arm/vgic.c | 189 ++++++++++++++++++++++++++++++++++++++++++++++++-
> >> 3 files changed, 216 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> >> index f1bf418..ce404a5 100644
> >> --- a/arch/arm/kvm/arm.c
> >> +++ b/arch/arm/kvm/arm.c
> >> @@ -125,6 +125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> >> if (ret)
> >> goto out_free_stage2_pgd;
> >>
> >> + kvm_vgic_early_init(kvm);
> >> kvm_timer_init(kvm);
> >>
> >> /* Mark the initial VMID generation invalid */
> >> @@ -249,6 +250,7 @@ out:
> >>
> >> void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
> >> {
> >> + kvm_vgic_vcpu_early_init(vcpu);
> >> }
> >>
> >> void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
> >> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> >> index a881e39..68212a1 100644
> >> --- a/include/kvm/arm_vgic.h
> >> +++ b/include/kvm/arm_vgic.h
> >> @@ -159,6 +159,20 @@ struct vgic_io_device {
> >> struct kvm_io_device dev;
> >> };
> >>
> >> +struct irq_phys_map {
> >> + u32 virt_irq;
> >> + u32 phys_irq;
> >> + u32 irq;
> >> + bool deleted;
> >> + bool active;
> >> +};
> >> +
> >> +struct irq_phys_map_entry {
> >> + struct list_head entry;
> >> + struct rcu_head rcu;
> >> + struct irq_phys_map map;
> >> +};
> >> +
> >> struct vgic_dist {
> >> spinlock_t lock;
> >> bool in_kernel;
> >> @@ -256,6 +270,10 @@ struct vgic_dist {
> >> struct vgic_vm_ops vm_ops;
> >> struct vgic_io_device dist_iodev;
> >> struct vgic_io_device *redist_iodevs;
> >> +
> >> + /* Virtual irq to hwirq mapping */
> >> + spinlock_t irq_phys_map_lock;
> >> + struct list_head irq_phys_map_list;
> >> };
> >>
> >> struct vgic_v2_cpu_if {
> >> @@ -307,6 +325,9 @@ struct vgic_cpu {
> >> struct vgic_v2_cpu_if vgic_v2;
> >> struct vgic_v3_cpu_if vgic_v3;
> >> };
> >> +
> >> + /* Protected by the distributor's irq_phys_map_lock */
> >> + struct list_head irq_phys_map_list;
> >> };
> >>
> >> #define LR_EMPTY 0xff
> >> @@ -321,8 +342,10 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
> >> int kvm_vgic_hyp_init(void);
> >> int kvm_vgic_map_resources(struct kvm *kvm);
> >> int kvm_vgic_get_max_vcpus(void);
> >> +void kvm_vgic_early_init(struct kvm *kvm);
> >> int kvm_vgic_create(struct kvm *kvm, u32 type);
> >> void kvm_vgic_destroy(struct kvm *kvm);
> >> +void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
> >> void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
> >> void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
> >> void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
> >> @@ -331,6 +354,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
> >> void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
> >> int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
> >> int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> >> +struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
> >> + int virt_irq, int irq);
> >> +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
> >>
> >> #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
> >> #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
> >> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> >> index 5bd1695..c74d929 100644
> >> --- a/virt/kvm/arm/vgic.c
> >> +++ b/virt/kvm/arm/vgic.c
> >> @@ -24,6 +24,7 @@
> >> #include <linux/of.h>
> >> #include <linux/of_address.h>
> >> #include <linux/of_irq.h>
> >> +#include <linux/rculist.h>
> >> #include <linux/uaccess.h>
> >>
> >> #include <asm/kvm_emulate.h>
> >> @@ -82,6 +83,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
> >> static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
> >> static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
> >> static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
> >> +static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
> >> + int virt_irq);
> >>
> >> static const struct vgic_ops *vgic_ops;
> >> static const struct vgic_params *vgic;
> >> @@ -1583,6 +1586,166 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
> >> return IRQ_HANDLED;
> >> }
> >>
> >> +static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
> >> + int virt_irq)
> >> +{
> >> + if (virt_irq < VGIC_NR_PRIVATE_IRQS)
> >> + return &vcpu->arch.vgic_cpu.irq_phys_map_list;
> >> + else
> >> + return &vcpu->kvm->arch.vgic.irq_phys_map_list;
> >> +}
> >> +
> >> +/**
> >> + * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
> >> + * @vcpu: The VCPU pointer
> >> + * @virt_irq: The virtual irq number
> >> + * @irq: The Linux IRQ number
> >> + *
> >> + * Establish a mapping between a guest visible irq (@virt_irq) and a
> >> + * Linux irq (@irq). On injection, @virt_irq will be associated with
> >> + * the physical interrupt represented by @irq.
> >> + *
> >> + * Returns a valid pointer on success, and an error pointer otherwise
> >> + */
> >> +struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
> >> + int virt_irq, int irq)
> >> +{
> >> + struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> >> + struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
> >> + struct irq_phys_map *map;
> >> + struct irq_phys_map_entry *entry;
> >> + struct irq_desc *desc;
> >> + struct irq_data *data;
> >> + int phys_irq;
> >> +
> >> + desc = irq_to_desc(irq);
> >> + if (!desc) {
> >> + kvm_err("kvm_vgic_map_phys_irq: no interrupt descriptor\n");
> >
> > super over-ridiculous nit:
> > if you respin you could consider "%s: no...", __FUNC__
>
> Sure.
>
> >
> >> + return ERR_PTR(-EINVAL);
> >> + }
> >> +
> >> + data = irq_desc_get_irq_data(desc);
> >> + while (data->parent_data)
> >> + data = data->parent_data;
> >> +
> >> + phys_irq = data->hwirq;
> >> +
> >> + /* Create a new mapping */
> >> + entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> >> + if (!entry)
> >> + return ERR_PTR(-ENOMEM);
> >> +
> >> + spin_lock(&dist->irq_phys_map_lock);
> >> +
> >> + /* Try to match an existing mapping */
> >> + map = vgic_irq_map_search(vcpu, virt_irq);
> >> + if (map) {
> >> + /* Make sure this mapping matches */
> >
> > why do we need to check that it matches? Is it ever allowed to have
> > multiple mappings of a virtual interrupt?
> >
>
> Multiple *different* mappings, no. What you can have though is issuing
> requests for the *same* mapping repeatedly. This is what happens with
> the timer, for example: kvm_timer_vcpu_reset is going to be called more
> than once over the lifetime of a VM, requesting a mapping each time
> without necessarily destroying the previous one.
>
> >> + if (map->phys_irq != phys_irq ||
> >> + map->irq != irq)
> >> + map = ERR_PTR(-EINVAL);
> >> +
> >> + goto out;
> >
> > why is this a valid situation? Shouldn't you return -EEXIST or
> > something instead? (that would also simplify the error-checking free
> > routine below).
> >
> > If you want it to be valid to map the same virq to irq multiple times
> > (to free the caller from some bookkeeping, I presume?) then that should
> > be part of the function documentation, and I think we should add a
> > "found existing mapping" comment above the goto out statement.
>
> Fair enough. I'll add some comments to that effect.
>
thanks!
> >> + }
> >> +
> >> + map = &entry->map;
> >> + map->virt_irq = virt_irq;
> >> + map->phys_irq = phys_irq;
> >> + map->irq = irq;
> >> +
> >> + list_add_tail_rcu(&entry->entry, root);
> >> +
> >> +out:
> >> + spin_unlock(&dist->irq_phys_map_lock);
> >> + /* If we've found a hit in the existing list, free the useless
> >> + * entry */
> >> + if (IS_ERR(map) || map != &entry->map)
> >> + kfree(entry);
> >> + return map;
> >> +}
> >> +
> >> +static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
> >> + int virt_irq)
> >> +{
> >> + struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
> >> + struct irq_phys_map_entry *entry;
> >> + struct irq_phys_map *map;
> >> +
> >> + rcu_read_lock();
> >> +
> >> + list_for_each_entry_rcu(entry, root, entry) {
> >> + map = &entry->map;
> >> + if (map->virt_irq == virt_irq && !map->deleted) {
> >> + rcu_read_unlock();
> >> + return map;
> >> + }
> >> + }
> >> +
> >> + rcu_read_unlock();
> >> +
> >> + return NULL;
> >> +}
> >> +
> >> +static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
> >> +{
> >> + struct irq_phys_map_entry *entry;
> >> +
> >> + entry = container_of(rcu, struct irq_phys_map_entry, rcu);
> >> + kfree(entry);
> >> +}
> >> +
> >> +/**
> >> + * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
> >> + * @vcpu: The VCPU pointer
> >> + * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
> >> + *
> >> + * Remove an existing mapping between virtual and physical interrupts.
> >> + */
> >> +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
> >> +{
> >> + struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> >> + struct irq_phys_map_entry *entry;
> >> + struct list_head *root;
> >> +
> >> + if (!map)
> >> + return -EINVAL;
> >> +
> >> + root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
> >> +
> >> + spin_lock(&dist->irq_phys_map_lock);
> >> +
> >> + list_for_each_entry(entry, root, entry) {
> >> + if (&entry->map == map && !map->deleted) {
> >> + map->deleted = true;
> >
> > why do you need the 'deleted' flag? You now search the list only after
> > holding the lock, so the race I pointed out before doesn't exist
> > anymore. Is there an additional issue that the deleted flag is taking
> > care of?
>
> This could still race with a destroy occuring before we take the lock,
> and before we get get RCU to do the cleanup (the list would still be
> populated).
>
That's not how I understand list_del_rcu; I think it deletes the entry
immediately with the right memory barriers. It is only the free
operation that happens on rcu sync and can be/is deferred.
So I'm pretty sure that when you search the list after holding the lock,
there is no race.
> >
> >> + list_del_rcu(&entry->entry);
> >> + call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
> >> + break;
> >> + }
> >> + }
> >> +
> >> + spin_unlock(&dist->irq_phys_map_lock);
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root)
> >> +{
> >> + struct vgic_dist *dist = &kvm->arch.vgic;
> >> + struct irq_phys_map_entry *entry;
> >> +
> >> + spin_lock(&dist->irq_phys_map_lock);
> >> +
> >> + list_for_each_entry(entry, root, entry) {
> >> + if (!entry->map.deleted) {
> >> + entry->map.deleted = true;
> >> + list_del_rcu(&entry->entry);
> >> + call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
> >> + }
> >> + }
> >> +
> >> + spin_unlock(&dist->irq_phys_map_lock);
> >> +}
> >> +
> >> void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
> >> {
> >> struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> >> @@ -1591,6 +1754,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
> >> kfree(vgic_cpu->active_shared);
> >> kfree(vgic_cpu->pend_act_shared);
> >> kfree(vgic_cpu->vgic_irq_lr_map);
> >> + vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
> >> vgic_cpu->pending_shared = NULL;
> >> vgic_cpu->active_shared = NULL;
> >> vgic_cpu->pend_act_shared = NULL;
> >> @@ -1628,6 +1792,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
> >> }
> >>
> >> /**
> >> + * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage
> >
> > earliest possible... oh how I love the vgic code.
> >
>
> You must be the only one! ;-)
>
> > If you have the energy and context to write a comment in the beginning
> > of this file describing the init flow, I think that would be useful...
> >
>
> How about starting with this?
>
> <quote>
> There are multiple stages to the vgic initialization, both for the
> distributor and the CPU interfaces.
>
> Distributor:
>
> - kvm_vgic_early_init(): initialization of static data that doesn't
> depend on any sizing information or emulation type. No allocation is
> allowed there.
>
> - vgic_init(): allocation and initialization of the generic data
> structures that depend on sizing information (number of CPUs, number of
> interrupts). Also initializes the vcpu specific data structures. Can be
> executed lazily for GICv2. [to be renamed to kvm_vgic_init??]
>
> CPU Interface:
>
> - kvm_vgic_cpu_early_init(): initialization of static data that doesn't
> depend on any sizing information or emulation type. No allocation is
> allowed there.
> </quote>
>
> I'm sure we can expand it as we go...
>
yes, this is great, it made me feel comfortable with the whole thing.
Thanks!
> >> + *
> >> + * No memory allocation should be performed here, only static init.
> >> + */
> >> +void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
> >> +{
> >> + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> >> + INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list);
> >> +}
> >> +
> >> +/**
> >> * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
> >> *
> >> * The host's GIC naturally limits the maximum amount of VCPUs a guest
> >> @@ -1664,6 +1839,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
> >> kfree(dist->irq_spi_target);
> >> kfree(dist->irq_pending_on_cpu);
> >> kfree(dist->irq_active_on_cpu);
> >> + vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list);
> >> dist->irq_sgi_sources = NULL;
> >> dist->irq_spi_cpu = NULL;
> >> dist->irq_spi_target = NULL;
> >> @@ -1787,6 +1963,18 @@ static int init_vgic_model(struct kvm *kvm, int type)
> >> return 0;
> >> }
> >>
> >> +/**
> >> + * kvm_vgic_early_init - Earliest possible vgic initialization stage
> >> + *
> >> + * No memory allocation should be performed here, only static init.
> >> + */
> >> +void kvm_vgic_early_init(struct kvm *kvm)
> >> +{
> >> + spin_lock_init(&kvm->arch.vgic.lock);
> >
> > so here we're initializing the data structures even before the vgic is
> > created, right? So therefore it's safe to move the vgic.lock init up
> > here?
>
> Yes. The distributor structure is part of the struct kvm, hence safe to
> poke early on. This doesn't depend on any form of sizing or emulation
> type either.
>
ok, slight warm fuzzy feeling.
Thanks,
-Christoffer
More information about the linux-arm-kernel
mailing list