[PATCH 07/13] pci: Provide sensible irq vector alloc/free routines

Alexander Gordeev agordeev at redhat.com
Thu Jun 23 04:16:10 PDT 2016


On Tue, Jun 14, 2016 at 09:59:00PM +0200, Christoph Hellwig wrote:
> Add a helper to allocate a range of interrupt vectors, which will
> transparently use MSI-X and MSI if available or fallback to legacy
> vectors.  The interrupts are available in a core managed array
> in the pci_dev structure, and can also be released using a similar
> helper.
> 
> The next patch will also add automatic spreading of MSI / MSI-X
> vectors to this function.
> 
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> ---
>  drivers/pci/msi.c   | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/pci.h |  18 +++++++++

New APIs should be documented in Documentation/PCI/MSI-HOWTO.txt, I guess.

>  2 files changed, 128 insertions(+)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index a080f44..a33adec 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -4,6 +4,7 @@
>   *
>   * Copyright (C) 2003-2004 Intel
>   * Copyright (C) Tom Long Nguyen (tom.l.nguyen at intel.com)
> + * Copyright (c) 2016 Christoph Hellwig.
>   */
>  
>  #include <linux/err.h>
> @@ -1120,6 +1121,115 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>  }
>  EXPORT_SYMBOL(pci_enable_msix_range);
>  
> +static unsigned int pci_nr_irq_vectors(struct pci_dev *pdev)
> +{
> +	int nr_entries;
> +
> +	nr_entries = pci_msix_vec_count(pdev);
> +	if (nr_entries <= 0 && pci_msi_supported(pdev, 1))
> +		nr_entries = pci_msi_vec_count(pdev);
> +	if (nr_entries <= 0)
> +		nr_entries = 1;
> +	return nr_entries;
> +}

This function is strange, because it:
  (a) does not consider PCI_IRQ_NOMSIX flag;
  (b) only calls pci_msi_supported() for MSI case;
  (c) calls pci_msi_supported() with just one vector;
  (d) might return suboptimal number of vectors (number of MSI-X used 
      later for MSI or vice versa)

Overall, I would suggest simply return maximum between MSI-X and MSI
numbers and let the rest of the code (i.e the two range functions)
handle a-d.

> +static int pci_enable_msix_range_wrapper(struct pci_dev *pdev, u32 *irqs,
> +		unsigned int min_vecs, unsigned int max_vecs)
> +{
> +	struct msix_entry *msix_entries;
> +	int vecs, i;
> +
> +	msix_entries = kcalloc(max_vecs, sizeof(struct msix_entry), GFP_KERNEL);
> +	if (!msix_entries)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < max_vecs; i++)
> +		msix_entries[i].entry = i;
> +
> +	vecs = pci_enable_msix_range(pdev, msix_entries, min_vecs, max_vecs);
> +	if (vecs > 0) {

This condition check is unneeded.

> +		for (i = 0; i < vecs; i++)
> +			irqs[i] = msix_entries[i].vector;
> +	}
> +
> +	kfree(msix_entries);
> +	return vecs;
> +}
> +
> +/**
> + * pci_alloc_irq_vectors - allocate multiple IRQs for a device
> + * @dev:		PCI device to operate on
> + * @min_vecs:		minimum number of vectors required (must be >= 1)
> + * @max_vecs:		maximum (desired) number of vectors
> + * @flags:		flags or quirks for the allocation
> + *
> + * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI
> + * vectors if available, and fall back to a single legacy vector
> + * if neither is available.  Return the number of vectors allocated,
> + * (which might be smaller than @max_vecs) if successful, or a negative
> + * error code on error.  The Linux irq numbers for the allocated
> + * vectors are stored in pdev->irqs.  If less than @min_vecs interrupt
> + * vectors are available for @dev the function will fail with -ENOSPC.
> + */
> +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
> +		unsigned int max_vecs, unsigned int flags)
> +{
> +	unsigned int vecs, i;
> +	u32 *irqs;
> +
> +	max_vecs = min(max_vecs, pci_nr_irq_vectors(dev));

Optionally, you could move this assignment to  pci_nr_irq_vectors() and
simply let it handle number of vectors to request.

> +	irqs = kcalloc(max_vecs, sizeof(u32), GFP_KERNEL);
> +	if (!irqs)
> +		return -ENOMEM;
> +
> +	if (!(flags & PCI_IRQ_NOMSIX)) {
> +		vecs = pci_enable_msix_range_wrapper(dev, irqs, min_vecs,
> +				max_vecs);
> +		if (vecs > 0)
> +			goto done;
> +	}
> +
> +	vecs = pci_enable_msi_range(dev, min_vecs, max_vecs);
> +	if (vecs > 0) {
> +		for (i = 0; i < vecs; i++)
> +			irqs[i] = dev->irq + i;
> +		goto done;
> +	}
> +
> +	if (min_vecs > 1)
> +		return -ENOSPC;

irqs is leaked if (min_vecs > 1)

You can get rid of this check at all if you reorganize your code i.e.
like this:

	...

	vecs = pci_enable_msi_range(dev, min_vecs, max_vecs);
	if (vecs < 0)
		goto legacy;

	for (i = 0; i < vecs; i++)
		irqs[i] = dev->irq + i;

done:
	...


legacy:
	...

> +
> +	/* use legacy irq */
> +	kfree(irqs);
> +	dev->irqs = &dev->irq;
> +	return 1;
> +
> +done:
> +	dev->irqs = irqs;
> +	return vecs;
> +}
> +EXPORT_SYMBOL(pci_alloc_irq_vectors);
> +
> +/**
> + * pci_free_irq_vectors - free previously allocated IRQs for a device
> + * @dev:		PCI device to operate on
> + *
> + * Undoes the allocations and enabling in pci_alloc_irq_vectors().
> + */
> +void pci_free_irq_vectors(struct pci_dev *dev)
> +{
> +	if (dev->msix_enabled)
> +		pci_disable_msix(dev);
> +	else if (dev->msi_enabled)
> +		pci_disable_msi(dev);

The checks are probably redundant or incomplete. Redundant - because
pci_disable_msi()/pci_disable_msix() do it anyways:

	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

Incomplete - because the two other conditions are not checked.

> +	if (dev->irqs != &dev->irq)
> +		kfree(dev->irqs);

Unset dev->irqs?

BTW, since (dev->irqs == &dev->irq) effectively checks if MSI/MSI-X
was enabled this function could bail out in case they did not.

> +}
> +EXPORT_SYMBOL(pci_free_irq_vectors);
> +
> +
>  struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
>  {
>  	return to_pci_dev(desc->dev);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index b67e4df..84a20fc 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -320,6 +320,7 @@ struct pci_dev {
>  	 * directly, use the values stored here. They might be different!
>  	 */
>  	unsigned int	irq;
> +	unsigned int	*irqs;
>  	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
>  
>  	bool match_driver;		/* Skip attaching driver */
> @@ -1237,6 +1238,8 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
>  int pci_set_vga_state(struct pci_dev *pdev, bool decode,
>  		      unsigned int command_bits, u32 flags);
>  
> +#define PCI_IRQ_NOMSIX		(1 << 0) /* don't try to use MSI-X interrupts */

BTW, why PCI_IRQ_NOMSIX only and no PCI_IRQ_NOMSI?

>  /* kmem_cache style wrapper around pci_alloc_consistent() */
>  
>  #include <linux/pci-dma.h>
> @@ -1284,6 +1287,9 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev,
>  		return rc;
>  	return 0;
>  }
> +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
> +		unsigned int max_vecs, unsigned int flags);
> +void pci_free_irq_vectors(struct pci_dev *dev);
>  #else
>  static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
>  static inline void pci_msi_shutdown(struct pci_dev *dev) { }
> @@ -1307,6 +1313,18 @@ static inline int pci_enable_msix_range(struct pci_dev *dev,
>  static inline int pci_enable_msix_exact(struct pci_dev *dev,
>  		      struct msix_entry *entries, int nvec)
>  { return -ENOSYS; }
> +static inline int pci_alloc_irq_vectors(struct pci_dev *dev,
> +		unsigned int min_vecs, unsigned int max_vecs,
> +		unsigned int flags)
> +{
> +	if (min_vecs > 1)
> +		return -ENOSPC;
> +	dev->irqs = &dev->irq;
> +	return 1;
> +}
> +static inline void pci_free_irq_vectors(struct pci_dev *dev)
> +{

Unset dev->irqs?

> +}
>  #endif
>  
>  #ifdef CONFIG_PCIEPORTBUS
> -- 
> 2.1.4
> 



More information about the Linux-nvme mailing list