[PATCH] genirq/msi: Make sure early activation of all PCI MSIs

Marc Zyngier maz at kernel.org
Thu Jan 21 16:25:26 EST 2021


Hi Shameer,

On Thu, 21 Jan 2021 11:02:47 +0000,
Shameer Kolothum <shameerali.kolothum.thodi at huawei.com> wrote:
> 
> We currently do early activation of MSI irqs for PCI/MSI based on
> the MSI_FLAG_ACTIVATE_EARLY flag. Though this activates all the
> allocated MSIs in the case of MSI-X, it only does so for the
> base irq in the case of MSI. This is because, for MSI, there
> is only one msi_desc entry for all the 32 irqs it can support
> and the current implementation iterates over the msi entries and
> ends up activating the base irq only.
> 
> The above creates an issue on platforms where the msi controller
> supports direct injection of vLPIs(eg: ARM GICv4 ITS). On these
> platforms, upon irq activation, ITS driver maps the event to an
> ITT entry. And for Guest pass-through to work, early mapping of
> all the dev MSI vectors is required. Otherwise, the vfio irq
> bypass manager registration will fail. eg, On a HiSilicon D06
> platform with GICv4 enabled, Guest boot with zip dev pass-through
> reports,
> 
> "vfio-pci 0000:75:00.1: irq bypass producer (token 0000000006e5176a)
> registration fails: 66311"
> 
> and Guest boot fails.
> 
> This is traced to,
>    kvm_arch_irq_bypass_add_producer
>      kvm_vgic_v4_set_forwarding
>        vgic_its_resolve_lpi --> returns E_ITS_INT_UNMAPPED_INTERRUPT
> 
> Hence make sure we activate all the irqs for both MSI and MSI-x cases.
> 
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi at huawei.com>
> ---
> It is unclear to me whether not performing the early activation of all
> MSI irqs was deliberate and has consequences on any other platforms.
> Please let me know.

Probably just an oversight.

> 
> Thanks,
> Shameer 
> ---
>  kernel/irq/msi.c | 114 +++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 90 insertions(+), 24 deletions(-)
> 
> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
> index 2c0c4d6d0f83..eec187fc32a9 100644
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -395,6 +395,78 @@ static bool msi_check_reservation_mode(struct irq_domain *domain,
>  	return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
>  }
>  
> +static void msi_domain_deactivate_irq(struct irq_domain *domain, int irq)
> +{
> +	struct irq_data *irqd;
> +
> +	irqd = irq_domain_get_irq_data(domain, irq);
> +	if (irqd_is_activated(irqd))
> +		irq_domain_deactivate_irq(irqd);
> +}
> +
> +static int msi_domain_activate_irq(struct irq_domain *domain,
> +				   int irq, bool can_reserve)
> +{
> +	struct irq_data *irqd;
> +
> +	irqd = irq_domain_get_irq_data(domain, irq);
> +	if (!can_reserve) {
> +		irqd_clr_can_reserve(irqd);
> +		if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> +			irqd_set_msi_nomask_quirk(irqd);
> +	}
> +	return irq_domain_activate_irq(irqd, can_reserve);
> +}
> +
> +static int msi_domain_activate_msix_irqs(struct irq_domain *domain,
> +					 struct device *dev, bool can_reserve)
> +{
> +	struct msi_desc *desc;
> +	int ret, irq;
> +
> +	for_each_msi_entry(desc, dev) {
> +		irq = desc->irq;
> +		ret = msi_domain_activate_irq(domain, irq, can_reserve);
> +		if (ret)
> +			goto out;
> +	}
> +	return 0;
> +
> +out:
> +	for_each_msi_entry(desc, dev) {
> +		if (irq == desc->irq)
> +			break;
> +		msi_domain_deactivate_irq(domain, desc->irq);
> +	}
> +	return ret;
> +}
> +
> +static int msi_domain_activate_msi_irqs(struct irq_domain *domain,
> +					struct device *dev, bool can_reserve)
> +{
> +	struct msi_desc *desc;
> +	int i, ret, base, irq;
> +
> +	desc = first_msi_entry(dev);
> +	base = desc->irq;
> +
> +	for (i = 0; i < desc->nvec_used; i++) {
> +		irq = base + i;
> +		ret = msi_domain_activate_irq(domain, irq, can_reserve);
> +		if (ret)
> +			goto out;
> +	}
> +	return 0;
> +
> +out:
> +	for (i = 0; i < desc->nvec_used; i++) {
> +		if (irq == base + i)
> +			break;
> +		msi_domain_deactivate_irq(domain, base + i);
> +	}
> +	return ret;
> +}
> +
>  int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
>  			    int nvec)
>  {
> @@ -443,21 +515,25 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
>  		else
>  			dev_dbg(dev, "irq [%d-%d] for MSI\n",
>  				virq, virq + desc->nvec_used - 1);
> -		/*
> -		 * This flag is set by the PCI layer as we need to activate
> -		 * the MSI entries before the PCI layer enables MSI in the
> -		 * card. Otherwise the card latches a random msi message.
> -		 */
> -		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
> -			continue;
> +	}
>  
> -		irq_data = irq_domain_get_irq_data(domain, desc->irq);
> -		if (!can_reserve) {
> -			irqd_clr_can_reserve(irq_data);
> -			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> -				irqd_set_msi_nomask_quirk(irq_data);
> -		}
> -		ret = irq_domain_activate_irq(irq_data, can_reserve);
> +	/*
> +	 * This flag is set by the PCI layer as we need to activate
> +	 * the MSI entries before the PCI layer enables MSI in the
> +	 * card. Otherwise the card latches a random msi message.
> +	 * Early activation is also required when the msi controller
> +	 * supports direct injection of virtual LPIs(eg. ARM GICv4 ITS).
> +	 * Otherwise, the DevID/EventID -> LPI translation for pass-through
> +	 * devices will fail. Make sure we do activate all the allocated
> +	 * irqs for MSI and MSI-X cases.
> +	 */
> +	if ((info->flags & MSI_FLAG_ACTIVATE_EARLY)) {
> +		desc = first_msi_entry(dev);
> +
> +		if (desc->msi_attrib.is_msix)
> +			ret = msi_domain_activate_msix_irqs(domain, dev, can_reserve);
> +		else
> +			ret = msi_domain_activate_msi_irqs(domain, dev, can_reserve);
>  		if (ret)
>  			goto cleanup;
>  	}
> @@ -475,16 +551,6 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
>  	return 0;
>  
>  cleanup:
> -	for_each_msi_entry(desc, dev) {
> -		struct irq_data *irqd;
> -
> -		if (desc->irq == virq)
> -			break;
> -
> -		irqd = irq_domain_get_irq_data(domain, desc->irq);
> -		if (irqd_is_activated(irqd))
> -			irq_domain_deactivate_irq(irqd);
> -	}
>  	msi_domain_free_irqs(domain, dev);
>  	return ret;
>  }
> -- 
> 2.17.1
> 
> 

I find this pretty complicated, and the I'd like to avoid injecting
the PCI MSI-vs-MSI-X concept in something that is supposed to be
bus-agnostic.

What's wrong with the following (untested) patch, which looks much
simpler?

Thanks,

	M.

diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2c0c4d6d0f83..a930d03c2c67 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -451,15 +451,17 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 		if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
 			continue;
 
-		irq_data = irq_domain_get_irq_data(domain, desc->irq);
-		if (!can_reserve) {
-			irqd_clr_can_reserve(irq_data);
-			if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
-				irqd_set_msi_nomask_quirk(irq_data);
+		for (i = 0; i < desc->nvec_used; i++) {
+			irq_data = irq_domain_get_irq_data(domain, desc->irq + i);
+			if (!can_reserve) {
+				irqd_clr_can_reserve(irq_data);
+				if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
+					irqd_set_msi_nomask_quirk(irq_data);
+			}
+			ret = irq_domain_activate_irq(irq_data, can_reserve);
+			if (ret)
+				goto cleanup;
 		}
-		ret = irq_domain_activate_irq(irq_data, can_reserve);
-		if (ret)
-			goto cleanup;
 	}
 
 	/*
@@ -478,12 +480,14 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 	for_each_msi_entry(desc, dev) {
 		struct irq_data *irqd;
 
+		for (i = 0; i < desc->nvec_used; i++) {
+			irqd = irq_domain_get_irq_data(domain, desc->irq + i);
+			if (irqd_is_activated(irqd))
+				irq_domain_deactivate_irq(irqd);
+		}
+
 		if (desc->irq == virq)
 			break;
-
-		irqd = irq_domain_get_irq_data(domain, desc->irq);
-		if (irqd_is_activated(irqd))
-			irq_domain_deactivate_irq(irqd);
 	}
 	msi_domain_free_irqs(domain, dev);
 	return ret;

-- 
Without deviation from the norm, progress is not possible.



More information about the linux-arm-kernel mailing list