[RFC v5 12/13] KVM: kvm-vfio: generic forwarding control

Alex Williamson alex.williamson at redhat.com
Tue Mar 31 10:20:10 PDT 2015


On Thu, 2015-03-19 at 15:55 +0100, Eric Auger wrote:
> This patch introduces a new KVM_DEV_VFIO_DEVICE group.
> 
> This is a new control channel which enables KVM to cooperate with
> viable VFIO devices.
> 
> The patch introduces 2 attributes for this group:
> KVM_DEV_VFIO_DEVICE_FORWARD_IRQ, KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ.
> Their purpose is to turn a VFIO device IRQ into a forwarded IRQ and
>  respectively unset the feature.
> 
> The generic part introduced here interact with VFIO, genirq, KVM while
> the architecture specific part mostly takes care of the virtual interrupt
> controller programming.
> 
> Architecture specific implementation is enabled when
> __KVM_HAVE_ARCH_KVM_VFIO_FORWARD is set. When not set those
> functions are void.
> 
> Signed-off-by: Eric Auger <eric.auger at linaro.org>
> 
> ---
> 
> v3 -> v4:
> - use new kvm_vfio_dev_irq struct
> - improve error handling according to Alex comments
> - full rework or generic/arch specific split to accomodate for
>   unforward that never fails
> - kvm_vfio_get_vfio_device and kvm_vfio_put_vfio_device removed from
>   that patch file and introduced before (since also used by Feng)
> - guard kvm_vfio_control_irq_forward call with
>   __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> 
> v2 -> v3:
> - add API comments in kvm_host.h
> - improve the commit message
> - create a private kvm_vfio_fwd_irq struct
> - fwd_irq_action replaced by a bool and removal of VFIO_IRQ_CLEANUP. This
>   latter action will be handled in vgic.
> - add a vfio_device handle argument to kvm_arch_set_fwd_state. The goal is
>   to move platform specific stuff in architecture specific code.
> - kvm_arch_set_fwd_state renamed into kvm_arch_vfio_set_forward
> - increment the ref counter each time we do an IRQ forwarding and decrement
>   this latter each time one IRQ forward is unset. Simplifies the whole
>   ref counting.
> - simplification of list handling: create, search, removal
> 
> v1 -> v2:
> - __KVM_HAVE_ARCH_KVM_VFIO renamed into __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> - original patch file separated into 2 parts: generic part moved in vfio.c
>   and ARM specific part(kvm_arch_set_fwd_state)
> ---
>  include/linux/kvm_host.h |  47 +++++++
>  virt/kvm/vfio.c          | 311 ++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 355 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f4e1829..8f17f87 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1042,6 +1042,15 @@ struct kvm_device_ops {
>  		      unsigned long arg);
>  };
>  
> +/* internal self-contained structure describing a forwarded IRQ */
> +struct kvm_fwd_irq {
> +	struct kvm *kvm; /* VM to inject the GSI into */
> +	struct vfio_device *vdev; /* vfio device the IRQ belongs to */
> +	__u32 index; /* VFIO device IRQ index */
> +	__u32 subindex; /* VFIO device IRQ subindex */
> +	__u32 gsi; /* gsi, ie. virtual IRQ number */
> +};
> +
>  void kvm_device_get(struct kvm_device *dev);
>  void kvm_device_put(struct kvm_device *dev);
>  struct kvm_device *kvm_device_from_filp(struct file *filp);
> @@ -1065,6 +1074,44 @@ inline void kvm_arch_resume_guest(struct kvm *kvm) {}
>  
>  #endif
>  
> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> +/**
> + * kvm_arch_set_forward - Sets forwarding for a given IRQ
> + *
> + * @kvm: handle to the VM
> + * @host_irq: physical IRQ number
> + * @guest_irq: virtual IRQ number
> + * returns 0 on success, < 0 on failure
> + */
> +int kvm_arch_set_forward(struct kvm *kvm,
> +			 unsigned int host_irq, unsigned int guest_irq);
> +
> +/**
> + * kvm_arch_unset_forward - Unsets forwarding for a given IRQ
> + *
> + * @kvm: handle to the VM
> + * @host_irq: physical IRQ number
> + * @guest_irq: virtual IRQ number
> + * @active: returns whether the IRQ is active
> + */
> +void kvm_arch_unset_forward(struct kvm *kvm,
> +			    unsigned int host_irq,
> +			    unsigned int guest_irq,
> +			    bool *active);
> +
> +#else
> +static inline int kvm_arch_set_forward(struct kvm *kvm,
> +				       unsigned int host_irq,
> +				       unsigned int guest_irq)
> +{
> +	return -ENOENT;
> +}
> +static inline void kvm_arch_unset_forward(struct kvm *kvm,
> +					  unsigned int host_irq,
> +					  unsigned int guest_irq,
> +					  bool *active) {}
> +#endif
> +
>  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
>  
>  static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index c995e51..4847597 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -19,14 +19,30 @@
>  #include <linux/uaccess.h>
>  #include <linux/vfio.h>
>  #include "vfio.h"
> +#include <linux/platform_device.h>
> +#include <linux/irq.h>
> +#include <linux/spinlock.h>
> +#include <linux/interrupt.h>
> +#include <linux/delay.h>
> +
> +#define DEBUG_FORWARD
> +#define DEBUG_UNFORWARD


These shouldn't be here

>  
>  struct kvm_vfio_group {
>  	struct list_head node;
>  	struct vfio_group *vfio_group;
>  };
>  
> +/* private linkable kvm_fwd_irq struct */
> +struct kvm_vfio_fwd_irq_node {
> +	struct list_head link;
> +	struct kvm_fwd_irq fwd_irq;
> +};
> +
>  struct kvm_vfio {
>  	struct list_head group_list;
> +	/* list of registered VFIO forwarded IRQs */
> +	struct list_head fwd_node_list;
>  	struct mutex lock;
>  	bool noncoherent;
>  };
> @@ -320,12 +336,293 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
>  	return -ENXIO;
>  }
>  
> +/**
> + * kvm_vfio_find_fwd_irq - checks whether a forwarded IRQ already is
> + * registered in the list of forwarded IRQs
> + *
> + * @kv: handle to the kvm-vfio device
> + * @fwd: handle to the forwarded irq struct
> + * In the positive returns the handle to its node in the kvm-vfio
> + * forwarded IRQ list, returns NULL otherwise.
> + * Must be called with kv->lock hold.
> + */
> +static struct kvm_vfio_fwd_irq_node *kvm_vfio_find_fwd_irq(
> +				struct kvm_vfio *kv,
> +				struct kvm_fwd_irq *fwd)
> +{
> +	struct kvm_vfio_fwd_irq_node *node;
> +
> +	list_for_each_entry(node, &kv->fwd_node_list, link) {
> +		if ((node->fwd_irq.index == fwd->index) &&
> +		    (node->fwd_irq.subindex == fwd->subindex) &&
> +		    (node->fwd_irq.vdev == fwd->vdev))
> +			return node;
> +	}
> +	return NULL;
> +}
> +/**
> + * kvm_vfio_register_fwd_irq - Allocates, populates and registers a
> + * forwarded IRQ
> + *
> + * @kv: handle to the kvm-vfio device
> + * @fwd: handle to the forwarded irq struct
> + * In case of success returns a handle to the new list node,
> + * NULL otherwise.
> + * Must be called with kv->lock hold.
> + */
> +static struct kvm_vfio_fwd_irq_node *kvm_vfio_register_fwd_irq(
> +				struct kvm_vfio *kv,
> +				struct kvm_fwd_irq *fwd)
> +{
> +	struct kvm_vfio_fwd_irq_node *node;
> +
> +	node = kmalloc(sizeof(*node), GFP_KERNEL);
> +	if (!node)
> +		return ERR_PTR(-ENOMEM);
> +
> +	node->fwd_irq = *fwd;
> +
> +	list_add(&node->link, &kv->fwd_node_list);
> +
> +	return node;
> +}
> +
> +/**
> + * kvm_vfio_unregister_fwd_irq - unregisters and frees a forwarded IRQ
> + *
> + * @node: handle to the node struct
> + * Must be called with kv->lock hold.
> + */
> +static void kvm_vfio_unregister_fwd_irq(struct kvm_vfio_fwd_irq_node *node)
> +{
> +	list_del(&node->link);
> +	kvm_vfio_put_vfio_device(node->fwd_irq.vdev);
> +	kfree(node);
> +}
> +
> +static int kvm_vfio_platform_get_irq(struct vfio_device *vdev, int index)
> +{
> +	int hwirq;
> +	struct platform_device *platdev;
> +	struct device *dev = kvm_vfio_external_base_device(vdev);
> +
> +	if (dev->bus == &platform_bus_type) {
> +		platdev = to_platform_device(dev);
> +		hwirq = platform_get_irq(platdev, index);
> +		return hwirq;
> +	} else
> +		return -EINVAL;
> +}

This seems like it was intended to be bus_type agnostic, but it's got
platform in the name.

> +
> +/**
> + * kvm_vfio_set_forward - turns a VFIO device IRQ into a forwarded IRQ
> + * @kv: handle to the kvm-vfio device
> + * @fd: file descriptor of the vfio device the IRQ belongs to
> + * @fwd: handle to the forwarded irq struct
> + *
> + * Registers and turns an IRQ as forwarded. The operation only is allowed
> + * if the IRQ is not in progress (active at interrupt controller level or
> + * auto-masked by the handler). In case the user-space masked the IRQ,
> + * the operation will fail too.
> + * returns:
> + * -EAGAIN if the IRQ is in progress or VFIO masked
> + * -EEXIST if the IRQ is already registered as forwarded
> + * -ENOMEM on memory shortage
> + */
> +static int kvm_vfio_set_forward(struct kvm_vfio *kv, int fd,
> +				struct kvm_fwd_irq *fwd)
> +{
> +	int ret = -EAGAIN;
> +	struct kvm_vfio_fwd_irq_node *node;
> +	struct vfio_platform_device *vpdev = vfio_device_data(fwd->vdev);
> +	int index = fwd->index;
> +	int host_irq = kvm_vfio_platform_get_irq(fwd->vdev, fwd->index);
> +	bool active;
> +
> +	kvm_arch_halt_guest(fwd->kvm);
> +
> +	disable_irq(host_irq);
> +
> +	active = kvm_vfio_external_is_active(vpdev, index);
> +
> +	if (active)
> +		goto out;
> +
> +	node = kvm_vfio_register_fwd_irq(kv, fwd);
> +	if (IS_ERR(node)) {
> +		ret = PTR_ERR(node);
> +		goto out;
> +	}
> +
> +	kvm_vfio_external_set_automasked(vpdev, index, false);
> +
> +	ret = kvm_arch_set_forward(fwd->kvm, host_irq, fwd->gsi);
> +
> +out:
> +	enable_irq(host_irq);
> +
> +	kvm_arch_resume_guest(fwd->kvm);
> +
> +	return ret;


If only we were passing around a vfio_device instead of a
vfio_platform_device and we had abstraction in place for the
kvm_vfio_external_foo callbacks...

> +}
> +
> +/**
> + * kvm_vfio_unset_forward - Sets a VFIO device IRQ as non forwarded
> + * @kv: handle to the kvm-vfio device
> + * @node: handle to the node to unset
> + *
> + * Calls the architecture specific implementation of set_forward and
> + * unregisters the IRQ from the forwarded IRQ list.
> + */
> +static void kvm_vfio_unset_forward(struct kvm_vfio *kv,
> +				   struct kvm_vfio_fwd_irq_node *node)
> +{
> +	struct kvm_fwd_irq fwd = node->fwd_irq;
> +	int host_irq = kvm_vfio_platform_get_irq(fwd.vdev, fwd.index);
> +	int index = fwd.index;
> +	struct vfio_platform_device *vpdev = vfio_device_data(fwd.vdev);
> +	bool active = false;
> +
> +	kvm_arch_halt_guest(fwd.kvm);
> +
> +	disable_irq(host_irq);
> +
> +	kvm_arch_unset_forward(fwd.kvm, host_irq, fwd.gsi, &active);
> +
> +	if (active)
> +		kvm_vfio_external_mask(vpdev, index);
> +
> +	kvm_vfio_external_set_automasked(vpdev, index, true);
> +	enable_irq(host_irq);
> +
> +	kvm_arch_resume_guest(fwd.kvm);
> +
> +	kvm_vfio_unregister_fwd_irq(node);

Same

> +}
> +
> +static int kvm_vfio_control_irq_forward(struct kvm_device *kdev, long attr,
> +					int32_t __user *argp)
> +{
> +	struct kvm_vfio_dev_irq user_fwd_irq;
> +	struct kvm_fwd_irq fwd;
> +	struct vfio_device *vdev;
> +	struct kvm_vfio *kv = kdev->private;
> +	struct kvm_vfio_fwd_irq_node *node;
> +	unsigned long minsz;
> +	int ret = 0;
> +	u32 *gsi = NULL;
> +	size_t size;
> +
> +	minsz = offsetofend(struct kvm_vfio_dev_irq, count);
> +
> +	if (copy_from_user(&user_fwd_irq, argp, minsz))
> +		return -EFAULT;
> +
> +	if (user_fwd_irq.argsz < minsz)
> +		return -EINVAL;
> +
> +	vdev = kvm_vfio_get_vfio_device(user_fwd_irq.fd);
> +	if (IS_ERR(vdev))
> +		return PTR_ERR(vdev);
> +
> +	ret = kvm_vfio_platform_get_irq(vdev, user_fwd_irq.index);
> +	if (ret < 0)
> +		goto error;
> +
> +	size = sizeof(__u32);
> +	if (user_fwd_irq.argsz - minsz < size) {
> +		ret = -EINVAL;
> +		goto error;
> +	}
> +
> +	gsi = memdup_user((void __user *)((unsigned long)argp + minsz), size);
> +	if (IS_ERR(gsi)) {
> +		ret = PTR_ERR(gsi);
> +		goto error;
> +	}
> +
> +	fwd.vdev =  vdev;
> +	fwd.kvm =  kdev->kvm;
> +	fwd.index = user_fwd_irq.index;
> +	fwd.subindex = 0;
> +	fwd.gsi = *gsi;
> +
> +	node = kvm_vfio_find_fwd_irq(kv, &fwd);
> +
> +	switch (attr) {
> +	case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
> +		if (node) {
> +			ret = -EEXIST;
> +			goto error;
> +		}
> +		mutex_lock(&kv->lock);
> +		ret = kvm_vfio_set_forward(kv, user_fwd_irq.fd, &fwd);
> +		mutex_unlock(&kv->lock);
> +		break;
> +	case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
> +		if (!node) {
> +			ret = -ENOENT;
> +			goto error;
> +		}
> +		mutex_lock(&kv->lock);
> +		kvm_vfio_unset_forward(kv, node);
> +		mutex_unlock(&kv->lock);
> +		kvm_vfio_put_vfio_device(vdev);
> +		ret = 0;
> +		break;
> +	}
> +
> +	kfree(gsi);
> +error:
> +	if (ret < 0)
> +		kvm_vfio_put_vfio_device(vdev);
> +	return ret;
> +}
> +
> +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
> +{
> +	int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
> +	int ret;
> +
> +	switch (attr) {
> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> +	case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
> +	case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
> +		ret = kvm_vfio_control_irq_forward(kdev, attr, argp);
> +		break;
> +#endif
> +	default:
> +		ret = -ENXIO;
> +	}
> +	return ret;
> +}
> +
> +/**
> + * kvm_vfio_clean_fwd_irq - Unset forwarding state of all
> + * registered forwarded IRQs and free their list nodes.
> + * @kv: kvm-vfio device
> + *
> + * Loop on all registered device/IRQ combos, reset the non forwarded state,
> + * void the lists and release the reference
> + */
> +static int kvm_vfio_clean_fwd_irq(struct kvm_vfio *kv)
> +{
> +	struct kvm_vfio_fwd_irq_node *node, *tmp;
> +
> +	list_for_each_entry_safe(node, tmp, &kv->fwd_node_list, link) {
> +		kvm_vfio_unset_forward(kv, node);
> +	}
> +	return 0;
> +}
> +
>  static int kvm_vfio_set_attr(struct kvm_device *dev,
>  			     struct kvm_device_attr *attr)
>  {
>  	switch (attr->group) {
>  	case KVM_DEV_VFIO_GROUP:
>  		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
> +	case KVM_DEV_VFIO_DEVICE:
> +		return kvm_vfio_set_device(dev, attr->attr, attr->addr);
>  	}
>  
>  	return -ENXIO;
> @@ -341,10 +638,17 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
>  		case KVM_DEV_VFIO_GROUP_DEL:
>  			return 0;
>  		}
> -
>  		break;
> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> +	case KVM_DEV_VFIO_DEVICE:
> +		switch (attr->attr) {
> +		case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
> +		case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
> +			return 0;
> +		}
> +		break;
> +#endif
>  	}
> -
>  	return -ENXIO;
>  }
>  
> @@ -358,7 +662,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
>  		list_del(&kvg->node);
>  		kfree(kvg);
>  	}
> -
> +	kvm_vfio_clean_fwd_irq(kv);
>  	kvm_vfio_update_coherency(dev);
>  
>  	kfree(kv);
> @@ -390,6 +694,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
>  		return -ENOMEM;
>  
>  	INIT_LIST_HEAD(&kv->group_list);
> +	INIT_LIST_HEAD(&kv->fwd_node_list);
>  	mutex_init(&kv->lock);
>  
>  	dev->private = kv;

It's so close to not tainting the kvm-vfio device with anything platform
specific, can't we provide that last bit of abstraction somehow?
Thanks,

Alex




More information about the linux-arm-kernel mailing list