[PATCH 14/16] PCI: hv: Switch to msi_create_parent_irq_domain()
Michael Kelley
mhklinux at outlook.com
Fri Jul 4 20:51:48 PDT 2025
From: Nam Cao <namcao at linutronix.de> Sent: Thursday, June 26, 2025 7:48 AM
>
> Move away from the legacy MSI domain setup, switch to use
> msi_create_parent_irq_domain().
With the additional tweak to this patch that you supplied separately,
everything in my testing on both x86 and arm64 seems to work OK. So
that's all good.
On arm64, I did notice the following IRQ domain information from
/sys/kernel/debug/irq/domains:
# cat HV-PCI-MSIX-1e03\:00\:00.0-12
name: HV-PCI-MSIX-1e03:00:00.0-12
size: 0
mapped: 7
flags: 0x00000213
IRQ_DOMAIN_FLAG_HIERARCHY
IRQ_DOMAIN_NAME_ALLOCATED
IRQ_DOMAIN_FLAG_MSI
IRQ_DOMAIN_FLAG_MSI_DEVICE
parent: 5D202AA8-1E03-4F0F-A786-390A0D2749E9-3
name: 5D202AA8-1E03-4F0F-A786-390A0D2749E9-3
size: 0
mapped: 7
flags: 0x00000103
IRQ_DOMAIN_FLAG_HIERARCHY
IRQ_DOMAIN_NAME_ALLOCATED
IRQ_DOMAIN_FLAG_MSI_PARENT
parent: hv_vpci_arm64
name: hv_vpci_arm64
size: 956
mapped: 31
flags: 0x00000003
IRQ_DOMAIN_FLAG_HIERARCHY
IRQ_DOMAIN_NAME_ALLOCATED
parent: irqchip at 0x00000000ffff0000-1
name: irqchip at 0x00000000ffff0000-1
size: 0
mapped: 47
flags: 0x00000003
IRQ_DOMAIN_FLAG_HIERARCHY
IRQ_DOMAIN_NAME_ALLOCATED
The 5D202AA8-1E03-4F0F-A786-390A0D2749E9-3 domain has
IRQ_DOMAIN_FLAG_MSI_PARENT set. But the hv_vpci_arm64
and irqchip at ... domains do not. Is that a problem? On x86,
the output is this, with IRQ_DOMAIN_FLAG_MSI_PARENT set
in the next level up VECTOR domain:
# cat HV-PCI-MSIX-6b71\:00\:02.0-12
name: HV-PCI-MSIX-6b71:00:02.0-12
size: 0
mapped: 17
flags: 0x00000213
IRQ_DOMAIN_FLAG_HIERARCHY
IRQ_DOMAIN_NAME_ALLOCATED
IRQ_DOMAIN_FLAG_MSI
IRQ_DOMAIN_FLAG_MSI_DEVICE
parent: 8564CB14-6B71-477C-B189-F175118E6FF0-3
name: 8564CB14-6B71-477C-B189-F175118E6FF0-3
size: 0
mapped: 17
flags: 0x00000103
IRQ_DOMAIN_FLAG_HIERARCHY
IRQ_DOMAIN_NAME_ALLOCATED
IRQ_DOMAIN_FLAG_MSI_PARENT
parent: VECTOR
name: VECTOR
size: 0
mapped: 67
flags: 0x00000103
IRQ_DOMAIN_FLAG_HIERARCHY
IRQ_DOMAIN_NAME_ALLOCATED
IRQ_DOMAIN_FLAG_MSI_PARENT
Finally, I've noted a couple of code review comments below. These
comments may reflect my lack of fully understanding the MSI
IRQ handling, in which case, please set me straight. Thanks,
Michael
>
> Signed-off-by: Nam Cao <namcao at linutronix.de>
> ---
> Cc: K. Y. Srinivasan <kys at microsoft.com>
> Cc: Haiyang Zhang <haiyangz at microsoft.com>
> Cc: Wei Liu <wei.liu at kernel.org>
> Cc: Dexuan Cui <decui at microsoft.com>
> Cc: linux-hyperv at vger.kernel.org
> ---
> drivers/pci/Kconfig | 1 +
> drivers/pci/controller/pci-hyperv.c | 98 +++++++++++++++++++++++------
> 2 files changed, 80 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index 9c0e4aaf4e8cb..9a249c65aedcd 100644
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -223,6 +223,7 @@ config PCI_HYPERV
> tristate "Hyper-V PCI Frontend"
> depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && SYSFS
> select PCI_HYPERV_INTERFACE
> + select IRQ_MSI_LIB
> help
> The PCI device frontend driver allows the kernel to import arbitrary
> PCI devices from a PCI backend to support PCI driver domains.
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index ef5d655a0052c..3a24fadddb83b 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -44,6 +44,7 @@
> #include <linux/delay.h>
> #include <linux/semaphore.h>
> #include <linux/irq.h>
> +#include <linux/irqchip/irq-msi-lib.h>
> #include <linux/msi.h>
> #include <linux/hyperv.h>
> #include <linux/refcount.h>
> @@ -508,7 +509,6 @@ struct hv_pcibus_device {
> struct list_head children;
> struct list_head dr_list;
>
> - struct msi_domain_info msi_info;
> struct irq_domain *irq_domain;
>
> struct workqueue_struct *wq;
> @@ -1687,7 +1687,7 @@ static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info,
> struct msi_desc *msi = irq_data_get_msi_desc(irq_data);
>
> pdev = msi_desc_to_pci_dev(msi);
> - hbus = info->data;
> + hbus = domain->host_data;
> int_desc = irq_data_get_irq_chip_data(irq_data);
> if (!int_desc)
> return;
> @@ -1705,7 +1705,6 @@ static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info,
>
> static void hv_irq_mask(struct irq_data *data)
> {
> - pci_msi_mask_irq(data);
> if (data->parent_data->chip->irq_mask)
> irq_chip_mask_parent(data);
> }
> @@ -1716,7 +1715,6 @@ static void hv_irq_unmask(struct irq_data *data)
>
> if (data->parent_data->chip->irq_unmask)
> irq_chip_unmask_parent(data);
> - pci_msi_unmask_irq(data);
> }
>
> struct compose_comp_ctxt {
> @@ -2101,6 +2099,44 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
> msg->data = 0;
> }
>
> +static bool hv_pcie_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
> + struct irq_domain *real_parent, struct msi_domain_info *info)
> +{
> + struct irq_chip *chip = info->chip;
> +
> + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
> + return false;
> +
> + info->ops->msi_prepare = hv_msi_prepare;
> +
> + chip->irq_set_affinity = irq_chip_set_affinity_parent;
> +
> + if (IS_ENABLED(CONFIG_X86))
> + chip->flags |= IRQCHIP_MOVE_DEFERRED;
> +
> + return true;
> +}
> +
> +#define HV_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
> + MSI_FLAG_USE_DEF_CHIP_OPS | \
> + MSI_FLAG_PCI_MSI_MASK_PARENT)
> +#define HV_PCIE_MSI_FLAGS_SUPPORTED (MSI_FLAG_MULTI_PCI_MSI | \
> + MSI_FLAG_PCI_MSIX | \
> + MSI_GENERIC_FLAGS_MASK)
> +
> +static const struct msi_parent_ops hv_pcie_msi_parent_ops = {
> + .required_flags = HV_PCIE_MSI_FLAGS_REQUIRED,
> + .supported_flags = HV_PCIE_MSI_FLAGS_SUPPORTED,
> + .bus_select_token = DOMAIN_BUS_PCI_MSI,
> +#ifdef CONFIG_X86
> + .chip_flags = MSI_CHIP_FLAG_SET_ACK,
> +#elif defined(CONFIG_ARM64)
> + .chip_flags = MSI_CHIP_FLAG_SET_EOI,
> +#endif
> + .prefix = "HV-",
> + .init_dev_msi_info = hv_pcie_init_dev_msi_info,
> +};
> +
> /* HW Interrupt Chip Descriptor */
> static struct irq_chip hv_msi_irq_chip = {
> .name = "Hyper-V PCIe MSI",
> @@ -2108,7 +2144,6 @@ static struct irq_chip hv_msi_irq_chip = {
> .irq_set_affinity = irq_chip_set_affinity_parent,
> #ifdef CONFIG_X86
> .irq_ack = irq_chip_ack_parent,
> - .flags = IRQCHIP_MOVE_DEFERRED,
> #elif defined(CONFIG_ARM64)
> .irq_eoi = irq_chip_eoi_parent,
> #endif
Would it work to drop the #ifdef's and always set both .irq_ack
and .irq_eoi on x86 and on ARM64? Is which one gets called
controlled by the child HV-PCI-MSIX- ... domain, based on
the .chip_flags? I'm trying to reduce the #ifdef clutter. I
tested without the #ifdefs on both x86 and arm64, and
everything works, but I know that doesn't prove that it's
OK.
If the #ifdefs can go away, then I'd like to see a tweak to the way
.chip_flags is set. Rather than do an #ifdef inline for struct
msi_parent_ops hv_pcie_msi_parent_ops, add a #define
HV_MSI_CHIP_FLAGS in the existing #ifdef X86 and #ifdef ARM64
sections respectively near the top of this source file, and then
use HV_MSI_CHIP_FLAGS in struct msi_parent_ops
hv_pcie_msi_parent_ops. As much as is reasonable, I'd like to
not clutter the code with #ifdef X86 #elseif ARM64, but instead
group all the differences under the existing #ifdefs near the top.
There are some places where this isn't practical, but this seems
like a place that is practical.
> @@ -2116,9 +2151,37 @@ static struct irq_chip hv_msi_irq_chip = {
> .irq_unmask = hv_irq_unmask,
> };
>
> -static struct msi_domain_ops hv_msi_ops = {
> - .msi_prepare = hv_msi_prepare,
> - .msi_free = hv_msi_free,
> +static int hv_pcie_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs,
> + void *arg)
> +{
> + /* TODO: move the content of hv_compose_msi_msg() in here */
Could you elaborate on this TODO? Is the idea to loop through all the IRQs and
generate the MSI message for each one? What is the advantage to doing it here?
I noticed in Patch 3 of the series, the Aardvark controller has
advk_msi_irq_compose_msi_msg(), but you had not moved it into the domain
allocation path.
Also, is there some point in the time in the future where the "TODO" is likely to
become a "MUST DO"?
> + int ret;
> +
> + ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg);
> + if (ret < 0)
> + return ret;
> +
> + for (int i = 0; i < nr_irqs; i++) {
> + irq_domain_set_info(d, virq + i, 0, &hv_msi_irq_chip, NULL, FLOW_HANDLER, NULL,
> + FLOW_NAME);
> + }
> +
> + return 0;
> +}
> +
> +static void hv_pcie_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)
> +{
> + struct msi_domain_info *info = d->host_data;
> +
> + for (int i = 0; i < nr_irqs; i++)
> + hv_msi_free(d, info, virq + i);
> +
> + irq_domain_free_irqs_top(d, virq, nr_irqs);
> +}
> +
> +static const struct irq_domain_ops hv_pcie_domain_ops = {
> + .alloc = hv_pcie_domain_alloc,
> + .free = hv_pcie_domain_free,
> };
>
> /**
> @@ -2136,17 +2199,14 @@ static struct msi_domain_ops hv_msi_ops = {
> */
> static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
> {
> - hbus->msi_info.chip = &hv_msi_irq_chip;
> - hbus->msi_info.ops = &hv_msi_ops;
> - hbus->msi_info.flags = (MSI_FLAG_USE_DEF_DOM_OPS |
> - MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI |
> - MSI_FLAG_PCI_MSIX);
> - hbus->msi_info.handler = FLOW_HANDLER;
> - hbus->msi_info.handler_name = FLOW_NAME;
> - hbus->msi_info.data = hbus;
> - hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode,
> - &hbus->msi_info,
> - hv_pci_get_root_domain());
> + struct irq_domain_info info = {
> + .fwnode = hbus->fwnode,
> + .ops = &hv_pcie_domain_ops,
> + .host_data = hbus,
> + .parent = hv_pci_get_root_domain(),
> + };
> +
> + hbus->irq_domain = msi_create_parent_irq_domain(&info, &hv_pcie_msi_parent_ops);
> if (!hbus->irq_domain) {
> dev_err(&hbus->hdev->device,
> "Failed to build an MSI IRQ domain\n");
> --
> 2.39.5
>
More information about the Linux-mediatek
mailing list