[RFC v2 8/9] KVM: KVM-VFIO: generic KVM_DEV_VFIO_DEVICE command and IRQ forwarding control

Eric Auger eric.auger at linaro.org
Thu Sep 11 02:35:56 PDT 2014


On 09/11/2014 05:10 AM, Christoffer Dall wrote:
> On Mon, Sep 01, 2014 at 02:52:47PM +0200, Eric Auger wrote:
>> This patch introduces a new KVM_DEV_VFIO_DEVICE attribute.
>>
>> This is a new control channel which enables KVM to cooperate with
>> viable VFIO devices.
>>
>> The kvm-vfio device now holds a list of devices (kvm_vfio_device)
>> in addition to a list of groups (kvm_vfio_group). The new
>> infrastructure enables to check the validity of the VFIO device
>> file descriptor, get and hold a reference to it.
>>
>> The first concrete implemented command is IRQ forward control:
>> KVM_DEV_VFIO_DEVICE_FORWARD_IRQ, KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ.
>>
>> It consists in programing the VFIO driver and KVM in a consistent manner
>> so that an optimized IRQ injection/completion is set up. Each
>> kvm_vfio_device holds a list of forwarded IRQ. When putting a
>> kvm_vfio_device, the implementation makes sure the forwarded IRQs
>> are set again in the normal handling state (non forwarded).
> 
> 'putting a kvm_vfio_device' sounds to like you're golf'ing :)
> 
> When a kvm_vfio_device is released?
sure
> 
>>
>> The forwarding programmming is architecture specific, embodied by the
>> kvm_arch_set_fwd_state function. Its implementation is given in a
>> separate patch file.
> 
> I would drop the last sentence and instead indicate that this is handled
> properly when the architecture does not support such a feature.
ok
> 
>>
>> The forwarding control modality is enabled by the
>> __KVM_HAVE_ARCH_KVM_VFIO_FORWARD define.
>>
>> Signed-off-by: Eric Auger <eric.auger at linaro.org>
>>
>> ---
>>
>> v1 -> v2:
>> - __KVM_HAVE_ARCH_KVM_VFIO renamed into __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
>> - original patch file separated into 2 parts: generic part moved in vfio.c
>>   and ARM specific part(kvm_arch_set_fwd_state)
>> ---
>>  include/linux/kvm_host.h |  27 +++
>>  virt/kvm/vfio.c          | 452 ++++++++++++++++++++++++++++++++++++++++++++++-
>>  2 files changed, 477 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index a4c33b3..24350dc 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -1065,6 +1065,21 @@ struct kvm_device_ops {
>>  		      unsigned long arg);
>>  };
>>  
>> +enum kvm_fwd_irq_action {
>> +	KVM_VFIO_IRQ_SET_FORWARD,
>> +	KVM_VFIO_IRQ_SET_NORMAL,
>> +	KVM_VFIO_IRQ_CLEANUP,
> 
> This is KVM internal API, so it would probably be good to document this.
> Especially the CLEANUP bit worries me, see below.
I will document it
> 
>> +};
>> +
>> +/* internal structure describing a forwarded IRQ */
>> +struct kvm_fwd_irq {
>> +	struct list_head link;
> 
> this list entry is local to the kvm vfio device, right? that means you
> probably want a struct with just the below fields, and then have a
> containing struct in the generic device file, private to it's logic.
I will introduce 2 separate structs
> 
>> +	__u32 index; /* platform device irq index */
>> +	__u32 hwirq; /*physical IRQ */
>> +	__u32 gsi; /* virtual IRQ */
>> +	struct kvm_vcpu *vcpu; /* vcpu to inject into*/
>> +};
>> +
>>  void kvm_device_get(struct kvm_device *dev);
>>  void kvm_device_put(struct kvm_device *dev);
>>  struct kvm_device *kvm_device_from_filp(struct file *filp);
>> @@ -1075,6 +1090,18 @@ extern struct kvm_device_ops kvm_vfio_ops;
>>  extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
>>  extern struct kvm_device_ops kvm_flic_ops;
>>  
>> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
>> +int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd,
> 
> what's the 'p' in pfwd?
will rename
> 
>> +			   enum kvm_fwd_irq_action action);
>> +
>> +#else
>> +static inline int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd,
>> +					 enum kvm_fwd_irq_action action)
>> +{
>> +	return 0;
>> +}
>> +#endif
>> +
>>  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
>>  
>>  static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
>> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
>> index 76dc7a1..e4a81c4 100644
>> --- a/virt/kvm/vfio.c
>> +++ b/virt/kvm/vfio.c
>> @@ -18,14 +18,24 @@
>>  #include <linux/slab.h>
>>  #include <linux/uaccess.h>
>>  #include <linux/vfio.h>
>> +#include <linux/platform_device.h>
>>  
>>  struct kvm_vfio_group {
>>  	struct list_head node;
>>  	struct vfio_group *vfio_group;
>>  };
>>  
>> +struct kvm_vfio_device {
>> +	struct list_head node;
>> +	struct vfio_device *vfio_device;
>> +	/* list of forwarded IRQs for that VFIO device */
>> +	struct list_head fwd_irq_list;
>> +	int fd;
>> +};
>> +
>>  struct kvm_vfio {
>>  	struct list_head group_list;
>> +	struct list_head device_list;
>>  	struct mutex lock;
>>  	bool noncoherent;
>>  };
>> @@ -246,12 +256,441 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
>>  	return -ENXIO;
>>  }
>>  
>> +/**
>> + * get_vfio_device - returns the vfio-device corresponding to this fd
>> + * @fd:fd of the vfio platform device
>> + *
>> + * checks it is a vfio device
>> + * increment its ref counter
> 
> why the short lines?  Just write this out in proper English.
OK
> 
>> + */
>> +static struct vfio_device *kvm_vfio_get_vfio_device(int fd)
>> +{
>> +	struct fd f;
>> +	struct vfio_device *vdev;
>> +
>> +	f = fdget(fd);
>> +	if (!f.file)
>> +		return NULL;
>> +	vdev = kvm_vfio_device_get_external_user(f.file);
>> +	fdput(f);
>> +	return vdev;
>> +}
>> +
>> +/**
>> + * put_vfio_device: put the vfio platform device
>> + * @vdev: vfio_device to put
>> + *
>> + * decrement the ref counter
>> + */
>> +static void kvm_vfio_put_vfio_device(struct vfio_device *vdev)
>> +{
>> +	kvm_vfio_device_put_external_user(vdev);
>> +}
>> +
>> +/**
>> + * kvm_vfio_find_device - look for the device in the assigned
>> + * device list
>> + * @kv: the kvm-vfio device
>> + * @vdev: the vfio_device to look for
>> + *
>> + * returns the associated kvm_vfio_device if the device is known,
>> + * meaning at least 1 IRQ is forwarded for this device.
>> + * in the device is not registered, returns NULL.
>> + */
> 
> are these functions meant to be exported?  Otherwise they should be
> static, and the documentation on these simple list iteration wrappers
> seems like overkill imho.
could be static indeed
> 
>> +struct kvm_vfio_device *kvm_vfio_find_device(struct kvm_vfio *kv,
>> +					     struct vfio_device *vdev)
>> +{
>> +	struct kvm_vfio_device *kvm_vdev_iter;
>> +
>> +	list_for_each_entry(kvm_vdev_iter, &kv->device_list, node) {
>> +		if (kvm_vdev_iter->vfio_device == vdev)
>> +			return kvm_vdev_iter;
>> +	}
>> +	return NULL;
>> +}
>> +
>> +/**
>> + * kvm_vfio_find_irq - look for a an irq in the device IRQ list
>> + * @kvm_vdev: the kvm_vfio_device
>> + * @irq_index: irq index
>> + *
>> + * returns the forwarded irq struct if it exists, NULL in the negative
>> + */
>> +struct kvm_fwd_irq *kvm_vfio_find_irq(struct kvm_vfio_device *kvm_vdev,
>> +				      int irq_index)
>> +{
>> +	struct kvm_fwd_irq *fwd_irq_iter;
>> +
>> +	list_for_each_entry(fwd_irq_iter, &kvm_vdev->fwd_irq_list, link) {
>> +		if (fwd_irq_iter->index == irq_index)
>> +			return fwd_irq_iter;
>> +	}
>> +	return NULL;
>> +}
>> +
>> +/**
>> + * validate_forward - checks whether forwarding a given IRQ is meaningful
>> + * @vdev:  vfio_device the IRQ belongs to
>> + * @fwd_irq: user struct containing the irq_index to forward
>> + * @kvm_vdev: if a forwarded IRQ already exists for that VFIO device,
>> + * kvm_vfio_device that holds it
>> + * @hwirq: irq numberthe irq index corresponds to
>> + *
>> + * checks the vfio-device is a platform vfio device
>> + * checks the irq_index corresponds to an actual hwirq and
>> + * checks this hwirq is not already forwarded
>> + * returns < 0 on following errors:
>> + * not a platform device, bad irq index, already forwarded
>> + */
>> +static int kvm_vfio_validate_forward(struct kvm_vfio *kv,
>> +			    struct vfio_device *vdev,
>> +			    struct kvm_arch_forwarded_irq *fwd_irq,
>> +			    struct kvm_vfio_device **kvm_vdev,
>> +			    int *hwirq)
>> +{
>> +	struct device *dev = kvm_vfio_external_base_device(vdev);
>> +	struct platform_device *platdev;
>> +
>> +	*hwirq = -1;
>> +	*kvm_vdev = NULL;
>> +	if (strcmp(dev->bus->name, "platform") == 0) {
>> +		platdev = to_platform_device(dev);
>> +		*hwirq = platform_get_irq(platdev, fwd_irq->index);
>> +		if (*hwirq < 0) {
>> +			kvm_err("%s incorrect index\n", __func__);
>> +			return -EINVAL;
>> +		}
>> +	} else {
>> +		kvm_err("%s not a platform device\n", __func__);
>> +		return -EINVAL;
>> +	}
> 
> need some spaceing here, also, I would turn this around, first check if
> the strcmp fails, and then error out, then do you next check etc., to
> avoid so many nested statements.
ok
> 
>> +	/* is a ref to this device already owned by the KVM-VFIO device? */
> 
> this comment is not particularly helpful in its current form, it would
> be helpful if you specified that we're checking whether that particular
> device/irq combo is already registered.
> 
>> +	*kvm_vdev = kvm_vfio_find_device(kv, vdev);
>> +	if (*kvm_vdev) {
>> +		if (kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index)) {
>> +			kvm_err("%s irq %d already forwarded\n",
>> +				__func__, *hwirq);
> 
> don't flood the kernel log because of a user error, just allocate an
> error code for this purpose and document it in the ABI, -EEXIST or
> something.
ok
> 
>> +			return -EINVAL;
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>> +/**
>> + * validate_unforward: check a deassignment is meaningful
>> + * @kv: the kvm_vfio device
>> + * @vdev: the vfio_device whose irq to deassign belongs to
>> + * @fwd_irq: the user struct that contains the fd and irq_index of the irq
>> + * @kvm_vdev: the kvm_vfio_device the forwarded irq belongs to, if
>> + * it exists
>> + *
>> + * returns 0 if the provided irq effectively is forwarded
>> + * (a ref to this vfio_device is hold and this irq belongs to
>                                     held
>> + * the forwarded irq of this device)
>> + * returns -EINVAL in the negative
> 
>                ENOENT should be returned if you don't have an entry.
> 	       EINVAL could be used if you supply an fd that isn't a
> 	       VFIO device file descriptor, for example.  Again,
> 	       consider documenting all this in the API.
> 
>> + */
>> +static int kvm_vfio_validate_unforward(struct kvm_vfio *kv,
>> +			      struct vfio_device *vdev,
>> +			      struct kvm_arch_forwarded_irq *fwd_irq,
>> +			      struct kvm_vfio_device **kvm_vdev)
>> +{
>> +	struct kvm_fwd_irq *pfwd;
>> +
>> +	*kvm_vdev = kvm_vfio_find_device(kv, vdev);
>> +	if (!kvm_vdev) {
>> +		kvm_err("%s no forwarded irq for this device\n", __func__);
> 
> don't flood the kernel log
ok
> 
>> +		return -EINVAL;
>> +	}
>> +	pfwd = kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index);
>> +	if (!pfwd) {
>> +		kvm_err("%s irq %d is not forwarded\n", __func__, fwd_irq->fd);
> 
> 
>> +		return -EINVAL;
> 
> same here
ok
> 
>> +	}
>> +	return 0;
>> +}
>> +
>> +/**
>> + * kvm_vfio_forward - set a forwarded IRQ
>> + * @kdev: the kvm device
>> + * @vdev: the vfio device the IRQ belongs to
>> + * @fwd_irq: the user struct containing the irq_index and guest irq
>> + * @must_put: tells the caller whether the vfio_device must be put after
>> + * the call (ref must be released in case a ref onto this device was
>> + * already hold or in case of new device and failure)
>> + *
>> + * validate the injection, activate forward and store the information
>       Validate
>> + * about which irq and which device is concerned so that on deassign or
>> + * kvm-vfio destruction everuthing can be cleaned up.
>                            everything
> 
> I'm not sure I understand this explanation.  Do we have concerned
> devices?
> 
> I think you want to say something along the lines of: If userspace passed
> a valid vfio device and irq handle and the architecture supports
> forwarding this combination, register the vfio_device and irq
> combination in the ....
ok
> 
>> + */
>> +static int kvm_vfio_forward(struct kvm_device *kdev,
>> +			    struct vfio_device *vdev,
>> +			    struct kvm_arch_forwarded_irq *fwd_irq,
>> +			    bool *must_put)
>> +{
>> +	int ret;
>> +	struct kvm_fwd_irq *pfwd = NULL;
>> +	struct kvm_vfio_device *kvm_vdev = NULL;
>> +	struct kvm_vfio *kv = kdev->private;
>> +	int hwirq;
>> +
>> +	*must_put = true;
>> +	ret = kvm_vfio_validate_forward(kv, vdev, fwd_irq,
>> +					&kvm_vdev, &hwirq);
>> +	if (ret < 0)
>> +		return -EINVAL;
>> +
>> +	pfwd = kzalloc(sizeof(*pfwd), GFP_KERNEL);
> 
> seems a bit pointless to zero-out the memory if you're setting all
> fields below.
ok
> 
>> +	if (!pfwd)
>> +		return -ENOMEM;
>> +	pfwd->index = fwd_irq->index;
>> +	pfwd->gsi = fwd_irq->gsi;
>> +	pfwd->hwirq = hwirq;
>> +	pfwd->vcpu = kvm_get_vcpu(kdev->kvm, 0);
>> +	ret = kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_SET_FORWARD);
>> +	if (ret < 0) {
>> +		kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_CLEANUP);
> 
> this whole thing feels incredibly broken to me.  Setting a forward
> should either work or not work, not something in between that leaves
> something to be cleaned up.  Why this two-stage thingy here?
I wanted to exploit the return value of vgic_map_phys_irq which is
likely to fail if the phys/virt mapping exists at VGIC level.

I already validated the injection from a KVM_VFIO_DEVICE point of view
(the device/irq is not known internally). But what if another external
component - which does not exist yet - maps the IRQ at VGIC level? Maybe
I need to replace the existing validation check by querying the VGIC at
low level instead of checking KVM-VFIO local variables.
> 
>> +		kfree(pfwd);
> 
> probably want to move your free-and-return-error to the end of the
> function.
ok
> 
>> +		return ret;
>> +	}
>> +
>> +	if (!kvm_vdev) {
>> +		/* create & insert the new device and keep the ref */
>> +		kvm_vdev = kzalloc(sizeof(*kvm_vdev), GFP_KERNEL);
> 
> again, no need for zeroing out the memory.
ok
> 
>> +		if (!kvm_vdev) {
>> +			kvm_arch_set_fwd_state(pfwd, false);
>> +			kfree(pfwd);
>> +			return -ENOMEM;
>> +		}
>> +
>> +		kvm_vdev->vfio_device = vdev;
>> +		kvm_vdev->fd = fwd_irq->fd;
>> +		INIT_LIST_HEAD(&kvm_vdev->fwd_irq_list);
>> +		list_add(&kvm_vdev->node, &kv->device_list);
>> +		/*
>> +		 * the only case where we keep the ref:
>> +		 * new device and forward setting successful
>> +		 */
>> +		*must_put = false;
>> +	}
>> +
>> +	list_add(&pfwd->link, &kvm_vdev->fwd_irq_list);
>> +
>> +	kvm_debug("forwarding set for fd=%d, hwirq=%d, gsi=%d\n",
>> +	fwd_irq->fd, hwirq, fwd_irq->gsi);
> 
> please indent this to align with the opening parenthesis.
ok
> 
>> +
>> +	return 0;
>> +}
>> +
>> +/**
>> + * remove_assigned_device - put a given device from the list
> 
> this isn't a 'put', at least not *just* a put.
correct, I will rephrase
> 
>> + * @kv: the kvm-vfio device
>> + * @vdev: the vfio-device to remove
>> + *
>> + * change the state of all forwarded IRQs, free the forwarded IRQ list,
>> + * remove the corresponding kvm_vfio_device from the assigned device
>> + * list.
>> + * returns true if the device could be removed, false in the negative
>> + */
>> +bool remove_assigned_device(struct kvm_vfio *kv,
>> +			    struct vfio_device *vdev)
>> +{
>> +	struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev;
>> +	struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq;
>> +	bool removed = false;
>> +	int ret;
>> +
>> +	list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev,
>> +				 &kv->device_list, node) {
>> +		if (kvm_vdev_iter->vfio_device == vdev) {
>> +			/* loop on all its forwarded IRQ */
>> +			list_for_each_entry_safe(fwd_irq_iter, tmp_irq,
>> +						 &kvm_vdev_iter->fwd_irq_list,
>> +						 link) {
> 
> hmm, seems this function is only called when you have no more forwarded
> IRQs, so isn't all of this completely dead (and unnecessary) code?
yep I can simplify all that cleanup
> 
>> +				ret = kvm_arch_set_fwd_state(fwd_irq_iter,
>> +						KVM_VFIO_IRQ_SET_NORMAL);
>> +				if (ret < 0)
>> +					return ret;
> 
> you're returning an error code to a bool function, which means you'll
> return true when there was an error.  Is this your intention? ;)
definitively not!
> 
> if we have an error here, this would be a very very bad situation wouldn't it?
sure. I will simplify this, transform kvm_arch_set_fwd_state into a void
function
> 
>> +				list_del(&fwd_irq_iter->link);
>> +				kfree(fwd_irq_iter);
>> +			}
>> +			/* all IRQs could be deassigned */
>> +			list_del(&kvm_vdev_iter->node);
>> +			kvm_vfio_device_put_external_user(
>> +				kvm_vdev_iter->vfio_device);
>> +			kfree(kvm_vdev_iter);
>> +			removed = true;
>> +			break;
>> +		}
>> +	}
>> +	return removed;
>> +}
>> +
>> +
>> +/**
>> + * remove_fwd_irq - remove a forwarded irq
>> + *
>> + * @kv: kvm-vfio device
>> + * kvm_vdev: the kvm_vfio_device the IRQ belongs to
>> + * irq_index: the index of the IRQ
>> + *
>> + * change the forwarded state of the IRQ, remove the IRQ from
>> + * the device forwarded IRQ list. In case it is the last one,
>> + * put the device
>> + */
>> +int remove_fwd_irq(struct kvm_vfio *kv,
>> +		   struct kvm_vfio_device *kvm_vdev,
>> +		   int irq_index)
>> +{
>> +	struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq;
>> +	int ret = -1;
>> +
>> +	list_for_each_entry_safe(fwd_irq_iter, tmp_irq,
>> +				 &kvm_vdev->fwd_irq_list, link) {
> 
> hmmm, you can only forward one irq for a specific device once, right?
> And you already have a lookup function, so why not call that, and then
> remove it?
> 
> I'm confused.
will fix that
> 
>> +		if (fwd_irq_iter->index == irq_index) {
>> +			ret = kvm_arch_set_fwd_state(fwd_irq_iter,
>> +						KVM_VFIO_IRQ_SET_NORMAL);
>> +			if (ret < 0)
>> +				break;
>> +			list_del(&fwd_irq_iter->link);
>> +			kfree(fwd_irq_iter);
>> +			ret = 0;
>> +			break;
>> +		}
>> +	}
>> +	if (list_empty(&kvm_vdev->fwd_irq_list))
>> +		remove_assigned_device(kv, kvm_vdev->vfio_device);
>> +
>> +	return ret;
>> +}
>> +
>> +/**
>> + * kvm_vfio_unforward - remove a forwarded IRQ
>> + * @kdev: the kvm device
>> + * @vdev: the vfio_device
>> + * @fwd_irq: user struct
>> + * after checking this IRQ effectively is forwarded, change its state,
>> + * remove it from the corresponding kvm_vfio_device list
>> + */
>> +static int kvm_vfio_unforward(struct kvm_device *kdev,
>> +				     struct vfio_device *vdev,
>> +				     struct kvm_arch_forwarded_irq *fwd_irq)
>> +{
>> +	struct kvm_vfio *kv = kdev->private;
>> +	struct kvm_vfio_device *kvm_vdev;
>> +	int ret;
>> +
>> +	ret = kvm_vfio_validate_unforward(kv, vdev, fwd_irq, &kvm_vdev);
>> +	if (ret < 0)
>> +		return -EINVAL;
> 
> why do you override the return value?  Propagate it.
ok
> 
>> +
>> +	ret = remove_fwd_irq(kv, kvm_vdev, fwd_irq->index);
>> +	if (ret < 0)
>> +		kvm_err("%s fail unforwarding (fd=%d, index=%d)\n",
>> +			__func__, fwd_irq->fd, fwd_irq->index);
>> +	else
>> +		kvm_debug("%s unforwarding IRQ (fd=%d, index=%d)\n",
>> +			  __func__, fwd_irq->fd, fwd_irq->index);
> 
> again with the kernel log here.
ok
> 
> 
> 
>> +	return ret;
>> +}
>> +
>> +
>> +
>> +
>> +/**
>> + * kvm_vfio_set_device - the top function for interracting with a vfio
> 
>                                 top?             interacting
> 
>> + * device
>> + */
> 
> probably just skip this comment
ok
> 
>> +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
>> +{
>> +	struct kvm_vfio *kv = kdev->private;
>> +	struct vfio_device *vdev;
>> +	struct kvm_arch_forwarded_irq fwd_irq; /* user struct */
>> +	int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
>> +
>> +	switch (attr) {
>> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
>> +	case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:{
>> +		bool must_put;
>> +		int ret;
>> +
>> +		if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq)))
>> +			return -EFAULT;
>> +		vdev = kvm_vfio_get_vfio_device(fwd_irq.fd);
>> +		if (IS_ERR(vdev))
>> +			return PTR_ERR(vdev);
> 
> seems like this whole block of code is replicated below, needs
> refactoring.
ok
> 
>> +		mutex_lock(&kv->lock);
>> +		ret = kvm_vfio_forward(kdev, vdev, &fwd_irq, &must_put);
>> +		if (must_put)
>> +			kvm_vfio_put_vfio_device(vdev);
> 
> this must_put looks plain weird.  I think you want to balance your
> get/put's always; can't you just get an extra reference in
> kvm_vfio_forward() ?
I will investigate that. Makes sense
> 
>> +		mutex_unlock(&kv->lock);
>> +		return ret;
>> +		}
>> +	case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: {
>> +		int ret;
>> +
>> +		if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq)))
>> +			return -EFAULT;
>> +		vdev = kvm_vfio_get_vfio_device(fwd_irq.fd);
>> +		if (IS_ERR(vdev))
>> +			return PTR_ERR(vdev);
>> +
>> +		kvm_vfio_device_put_external_user(vdev);
> 
> you're dropping the reference to the device but referencing it in your
> unfoward call below?
thanks for identifying that bug.
> 
>> +		mutex_lock(&kv->lock);
>> +		ret = kvm_vfio_unforward(kdev, vdev, &fwd_irq);
>> +		mutex_unlock(&kv->lock);
>> +		return ret;
>> +	}
>> +#endif
>> +	default:
>> +		return -ENXIO;
>> +	}
>> +}
>> +
>> +/**
>> + * kvm_vfio_put_all_devices - cancel forwarded IRQs and put all devices
>> + * @kv: kvm-vfio device
>> + *
>> + * loop on all got devices and their associated forwarded IRQs
> 
> 'loop on all got' ?
> 
> Restore the non-forwarded state for all registered devices and ...
ok
> 
>> + * restore the non forwarded state, remove IRQs and their devices from
>> + * the respective list, put the vfio platform devices
>> + *
>> + * When this function is called, the vcpu already are destroyed. No
>                                     the VPUCs are already destroyed.
>> + * vgic manipulation can happen hence the KVM_VFIO_IRQ_CLEANUP
>> + * kvm_arch_set_fwd_state action
> 
> this last bit didn't make any sense to me.  Also, why are we referring
> to the vgic in generic code?
doesn't make sense anymore indeed. I wanted to emphasize the fact that
VGIC KVM device is destroyed before the KVM VFIO device and this
explains why I need a special CLEANUP cmd (besides the fact I need to
call chip->irq_eoi(d) for the forwarded IRQs);


> 
>> + */
>> +int kvm_vfio_put_all_devices(struct kvm_vfio *kv)
>> +{
>> +	struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq;
>> +	struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev;
>> +
>> +	/* loop on all the assigned devices */
> 
> unnecessary comment
ok
> 
>> +	list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev,
>> +				 &kv->device_list, node) {
>> +
>> +		/* loop on all its forwarded IRQ */
> 
> same
ok

Thanks for the detailed review

Best Regards

Eric
> 
>> +		list_for_each_entry_safe(fwd_irq_iter, tmp_irq,
>> +					 &kvm_vdev_iter->fwd_irq_list, link) {
>> +			kvm_arch_set_fwd_state(fwd_irq_iter,
>> +						KVM_VFIO_IRQ_CLEANUP);
>> +			list_del(&fwd_irq_iter->link);
>> +			kfree(fwd_irq_iter);
>> +		}
>> +		list_del(&kvm_vdev_iter->node);
>> +		kvm_vfio_device_put_external_user(kvm_vdev_iter->vfio_device);
>> +		kfree(kvm_vdev_iter);
>> +	}
>> +	return 0;
>> +}
>> +
>> +
>>  static int kvm_vfio_set_attr(struct kvm_device *dev,
>>  			     struct kvm_device_attr *attr)
>>  {
>>  	switch (attr->group) {
>>  	case KVM_DEV_VFIO_GROUP:
>>  		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
>> +	case KVM_DEV_VFIO_DEVICE:
>> +		return kvm_vfio_set_device(dev, attr->attr, attr->addr);
>>  	}
>>  
>>  	return -ENXIO;
>> @@ -267,10 +706,17 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
>>  		case KVM_DEV_VFIO_GROUP_DEL:
>>  			return 0;
>>  		}
>> -
>>  		break;
>> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
>> +	case KVM_DEV_VFIO_DEVICE:
>> +		switch (attr->attr) {
>> +		case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
>> +		case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
>> +			return 0;
>> +		}
>> +		break;
>> +#endif
>>  	}
>> -
>>  	return -ENXIO;
>>  }
>>  
>> @@ -284,6 +730,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
>>  		list_del(&kvg->node);
>>  		kfree(kvg);
>>  	}
>> +	kvm_vfio_put_all_devices(kv);
>>  
>>  	kvm_vfio_update_coherency(dev);
>>  
>> @@ -306,6 +753,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
>>  		return -ENOMEM;
>>  
>>  	INIT_LIST_HEAD(&kv->group_list);
>> +	INIT_LIST_HEAD(&kv->device_list);
>>  	mutex_init(&kv->lock);
>>  
>>  	dev->private = kv;
>> -- 
>> 1.9.1
>>




More information about the linux-arm-kernel mailing list