[v8,5/7] PCI: mediatek-gen3: Add MSI support

Jianjun Wang jianjun.wang at mediatek.com
Wed Mar 10 06:48:49 GMT 2021


Hi Marc,

Thanks for your review.

On Tue, 2021-03-09 at 11:23 +0000, Marc Zyngier wrote:
> On Wed, 24 Feb 2021 06:11:30 +0000,
> Jianjun Wang <jianjun.wang at mediatek.com> wrote:
> > 
> > Add MSI support for MediaTek Gen3 PCIe controller.
> > 
> > This PCIe controller supports up to 256 MSI vectors, the MSI hardware
> > block diagram is as follows:
> > 
> >                   +-----+
> >                   | GIC |
> >                   +-----+
> >                      ^
> >                      |
> >                  port->irq
> >                      |
> >              +-+-+-+-+-+-+-+-+
> >              |0|1|2|3|4|5|6|7| (PCIe intc)
> >              +-+-+-+-+-+-+-+-+
> >               ^ ^           ^
> >               | |    ...    |
> >       +-------+ +------+    +-----------+
> >       |                |                |
> > +-+-+---+--+--+  +-+-+---+--+--+  +-+-+---+--+--+
> > |0|1|...|30|31|  |0|1|...|30|31|  |0|1|...|30|31| (MSI sets)
> > +-+-+---+--+--+  +-+-+---+--+--+  +-+-+---+--+--+
> >  ^ ^      ^  ^    ^ ^      ^  ^    ^ ^      ^  ^
> >  | |      |  |    | |      |  |    | |      |  |  (MSI vectors)
> >  | |      |  |    | |      |  |    | |      |  |
> > 
> >   (MSI SET0)       (MSI SET1)  ...   (MSI SET7)
> > 
> > With 256 MSI vectors supported, the MSI vectors are composed of 8 sets,
> > each set has its own address for MSI message, and supports 32 MSI vectors
> > to generate interrupt.
> > 
> > Signed-off-by: Jianjun Wang <jianjun.wang at mediatek.com>
> > Acked-by: Ryder Lee <ryder.lee at mediatek.com>
> > ---
> >  drivers/pci/controller/pcie-mediatek-gen3.c | 277 ++++++++++++++++++++
> >  1 file changed, 277 insertions(+)
> > 
> > diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
> > index 8b3b5f838b69..3cbec22ece0c 100644
> > --- a/drivers/pci/controller/pcie-mediatek-gen3.c
> > +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
> > @@ -14,6 +14,7 @@
> >  #include <linux/irqdomain.h>
> >  #include <linux/kernel.h>
> >  #include <linux/module.h>
> > +#include <linux/msi.h>
> >  #include <linux/pci.h>
> >  #include <linux/phy/phy.h>
> >  #include <linux/platform_device.h>
> > @@ -48,12 +49,29 @@
> >  #define PCIE_LINK_STATUS_REG		0x154
> >  #define PCIE_PORT_LINKUP		BIT(8)
> >  
> > +#define PCIE_MSI_SET_NUM		8
> > +#define PCIE_MSI_IRQS_PER_SET		32
> > +#define PCIE_MSI_IRQS_NUM \
> > +	(PCIE_MSI_IRQS_PER_SET * PCIE_MSI_SET_NUM)
> > +
> >  #define PCIE_INT_ENABLE_REG		0x180
> > +#define PCIE_MSI_ENABLE			GENMASK(PCIE_MSI_SET_NUM + 8 - 1, 8)
> > +#define PCIE_MSI_SHIFT			8
> >  #define PCIE_INTX_SHIFT			24
> >  #define PCIE_INTX_ENABLE \
> >  	GENMASK(PCIE_INTX_SHIFT + PCI_NUM_INTX - 1, PCIE_INTX_SHIFT)
> >  
> >  #define PCIE_INT_STATUS_REG		0x184
> > +#define PCIE_MSI_SET_ENABLE_REG		0x190
> > +#define PCIE_MSI_SET_ENABLE		GENMASK(PCIE_MSI_SET_NUM - 1, 0)
> > +
> > +#define PCIE_MSI_SET_BASE_REG		0xc00
> > +#define PCIE_MSI_SET_OFFSET		0x10
> > +#define PCIE_MSI_SET_STATUS_OFFSET	0x04
> > +#define PCIE_MSI_SET_ENABLE_OFFSET	0x08
> > +
> > +#define PCIE_MSI_SET_ADDR_HI_BASE	0xc80
> > +#define PCIE_MSI_SET_ADDR_HI_OFFSET	0x04
> >  
> >  #define PCIE_TRANS_TABLE_BASE_REG	0x800
> >  #define PCIE_ATR_SRC_ADDR_MSB_OFFSET	0x4
> > @@ -73,6 +91,16 @@
> >  #define PCIE_ATR_TLP_TYPE_MEM		PCIE_ATR_TLP_TYPE(0)
> >  #define PCIE_ATR_TLP_TYPE_IO		PCIE_ATR_TLP_TYPE(2)
> >  
> > +/**
> > + * struct mtk_pcie_msi - MSI information for each set
> > + * @base: IO mapped register base
> > + * @msg_addr: MSI message address
> > + */
> > +struct mtk_msi_set {
> > +	void __iomem *base;
> > +	phys_addr_t msg_addr;
> > +};
> > +
> >  /**
> >   * struct mtk_pcie_port - PCIe port information
> >   * @dev: pointer to PCIe device
> > @@ -86,6 +114,11 @@
> >   * @irq: PCIe controller interrupt number
> >   * @irq_lock: lock protecting IRQ register access
> >   * @intx_domain: legacy INTx IRQ domain
> > + * @msi_domain: MSI IRQ domain
> > + * @msi_bottom_domain: MSI IRQ bottom domain
> > + * @msi_sets: MSI sets information
> > + * @lock: lock protecting IRQ bit map
> > + * @msi_irq_in_use: bit map for assigned MSI IRQ
> >   */
> >  struct mtk_pcie_port {
> >  	struct device *dev;
> > @@ -100,6 +133,11 @@ struct mtk_pcie_port {
> >  	int irq;
> >  	raw_spinlock_t irq_lock;
> >  	struct irq_domain *intx_domain;
> > +	struct irq_domain *msi_domain;
> > +	struct irq_domain *msi_bottom_domain;
> > +	struct mtk_msi_set msi_sets[PCIE_MSI_SET_NUM];
> > +	struct mutex lock;
> > +	DECLARE_BITMAP(msi_irq_in_use, PCIE_MSI_IRQS_NUM);
> >  };
> >  
> >  /**
> > @@ -197,6 +235,35 @@ static int mtk_pcie_set_trans_table(struct mtk_pcie_port *port,
> >  	return 0;
> >  }
> >  
> > +static void mtk_pcie_enable_msi(struct mtk_pcie_port *port)
> > +{
> > +	int i;
> > +	u32 val;
> > +
> > +	val = readl_relaxed(port->base + PCIE_MSI_SET_ENABLE_REG);
> > +	val |= PCIE_MSI_SET_ENABLE;
> > +	writel_relaxed(val, port->base + PCIE_MSI_SET_ENABLE_REG);
> > +
> > +	val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
> > +	val |= PCIE_MSI_ENABLE;
> > +	writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
> 
> Shouldn't you configure the capture addresses *before* enabling
> things? Is there any need for locking here, given that you are
> modifying global registers?

Yes, I will move these codes to the back of the configure capture
address in the next version.

I think the lock may not be needed because this function is only
executed once when driver probe.

> 
> > +
> > +	for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
> > +		struct mtk_msi_set *msi_set = &port->msi_sets[i];
> > +
> > +		msi_set->base = port->base + PCIE_MSI_SET_BASE_REG +
> > +				i * PCIE_MSI_SET_OFFSET;
> > +		msi_set->msg_addr = port->reg_base + PCIE_MSI_SET_BASE_REG +
> > +				    i * PCIE_MSI_SET_OFFSET;
> > +
> > +		/* Configure the MSI capture address */
> > +		writel_relaxed(lower_32_bits(msi_set->msg_addr), msi_set->base);
> > +		writel_relaxed(upper_32_bits(msi_set->msg_addr),
> > +			       port->base + PCIE_MSI_SET_ADDR_HI_BASE +
> > +			       i * PCIE_MSI_SET_ADDR_HI_OFFSET);
> > +	}
> > +}
> > +
> >  static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
> >  {
> >  	struct resource_entry *entry;
> > @@ -247,6 +314,8 @@ static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
> >  		return err;
> >  	}
> >  
> > +	mtk_pcie_enable_msi(port);
> > +
> >  	/* Set PCIe translation windows */
> >  	resource_list_for_each_entry(entry, &host->windows) {
> >  		struct resource *res = entry->res;
> > @@ -290,6 +359,148 @@ static int mtk_pcie_set_affinity(struct irq_data *data,
> >  	return -EINVAL;
> >  }
> >  
> > +static void mtk_pcie_irq_mask(struct irq_data *data)
> 
> It'd be good if you used _msi_ in function names that deal with MSIs.

OK, I will fix it in the next version.

> 
> > +{
> > +	pci_msi_mask_irq(data);
> > +	irq_chip_mask_parent(data);
> > +}
> > +
> > +static void mtk_pcie_irq_unmask(struct irq_data *data)
> > +{
> > +	pci_msi_unmask_irq(data);
> > +	irq_chip_unmask_parent(data);
> > +}
> > +
> > +static struct irq_chip mtk_msi_irq_chip = {
> > +	.name = "MSI",
> > +	.irq_enable = mtk_pcie_irq_unmask,
> > +	.irq_disable = mtk_pcie_irq_mask,
> 
> Same comment as for the previous patch: enable/disable serve no
> purpose here.

Replied in the previous patch, the enable/disable callback is used when
the system suspend/resume.

> 
> > +	.irq_ack = irq_chip_ack_parent,
> > +	.irq_mask = mtk_pcie_irq_mask,
> > +	.irq_unmask = mtk_pcie_irq_unmask,
> > +};
> > +
> > +static struct msi_domain_info mtk_msi_domain_info = {
> > +	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_PCI_MSIX |
> > +		   MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI),
> 
> minor nit: keep the *_OPS flag on one line, and the *_PCI_* flags on
> another.

Sure, I will fix it in the next version.

> 
> > +	.chip	= &mtk_msi_irq_chip,
> > +};
> > +
> > +static void mtk_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
> > +{
> > +	struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
> > +	struct mtk_pcie_port *port = data->domain->host_data;
> > +	unsigned long hwirq;
> > +
> > +	hwirq =	data->hwirq % PCIE_MSI_IRQS_PER_SET;
> > +
> > +	msg->address_hi = upper_32_bits(msi_set->msg_addr);
> > +	msg->address_lo = lower_32_bits(msi_set->msg_addr);
> > +	msg->data = hwirq;
> > +	dev_dbg(port->dev, "msi#%#lx address_hi %#x address_lo %#x data %d\n",
> > +		hwirq, msg->address_hi, msg->address_lo, msg->data);
> > +}
> > +
> > +static void mtk_msi_bottom_irq_ack(struct irq_data *data)
> > +{
> > +	struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
> > +	unsigned long hwirq;
> > +
> > +	hwirq =	data->hwirq % PCIE_MSI_IRQS_PER_SET;
> > +
> > +	writel_relaxed(BIT(hwirq), msi_set->base + PCIE_MSI_SET_STATUS_OFFSET);
> > +}
> > +
> > +static void mtk_msi_bottom_irq_mask(struct irq_data *data)
> > +{
> > +	struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
> > +	struct mtk_pcie_port *port = data->domain->host_data;
> > +	unsigned long hwirq, flags;
> > +	u32 val;
> > +
> > +	hwirq =	data->hwirq % PCIE_MSI_IRQS_PER_SET;
> > +
> > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > +	val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
> > +	val &= ~BIT(hwirq);
> > +	writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
> > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > +}
> > +
> > +static void mtk_msi_bottom_irq_unmask(struct irq_data *data)
> > +{
> > +	struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
> > +	struct mtk_pcie_port *port = data->domain->host_data;
> > +	unsigned long hwirq, flags;
> > +	u32 val;
> > +
> > +	hwirq =	data->hwirq % PCIE_MSI_IRQS_PER_SET;
> > +
> > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > +	val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
> > +	val |= BIT(hwirq);
> > +	writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
> > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > +}
> > +
> > +static struct irq_chip mtk_msi_bottom_irq_chip = {
> > +	.irq_ack		= mtk_msi_bottom_irq_ack,
> > +	.irq_mask		= mtk_msi_bottom_irq_mask,
> > +	.irq_unmask		= mtk_msi_bottom_irq_unmask,
> > +	.irq_compose_msi_msg	= mtk_compose_msi_msg,
> > +	.irq_set_affinity	= mtk_pcie_set_affinity,
> > +	.name			= "MSI",
> > +};
> > +
> > +static int mtk_msi_bottom_domain_alloc(struct irq_domain *domain,
> > +				       unsigned int virq, unsigned int nr_irqs,
> > +				       void *arg)
> > +{
> > +	struct mtk_pcie_port *port = domain->host_data;
> > +	struct mtk_msi_set *msi_set;
> > +	int i, hwirq, set_idx;
> > +
> > +	mutex_lock(&port->lock);
> > +
> > +	hwirq = bitmap_find_free_region(port->msi_irq_in_use, PCIE_MSI_IRQS_NUM,
> > +					order_base_2(nr_irqs));
> > +
> > +	mutex_unlock(&port->lock);
> > +
> > +	if (hwirq < 0)
> > +		return -ENOSPC;
> > +
> > +	set_idx = hwirq / PCIE_MSI_IRQS_PER_SET;
> > +	msi_set = &port->msi_sets[set_idx];
> > +
> > +	for (i = 0; i < nr_irqs; i++)
> > +		irq_domain_set_info(domain, virq + i, hwirq + i,
> > +				    &mtk_msi_bottom_irq_chip, msi_set,
> > +				    handle_edge_irq, NULL, NULL);
> > +
> > +	return 0;
> > +}
> > +
> > +static void mtk_msi_bottom_domain_free(struct irq_domain *domain,
> > +				       unsigned int virq, unsigned int nr_irqs)
> > +{
> > +	struct mtk_pcie_port *port = domain->host_data;
> > +	struct irq_data *data = irq_domain_get_irq_data(domain, virq);
> > +
> > +	mutex_lock(&port->lock);
> > +
> > +	bitmap_clear(port->msi_irq_in_use, data->hwirq, nr_irqs);
> > +
> > +	mutex_unlock(&port->lock);
> > +
> > +	irq_domain_free_irqs_common(domain, virq, nr_irqs);
> > +}
> > +
> > +static const struct irq_domain_ops mtk_msi_bottom_domain_ops = {
> > +	.alloc = mtk_msi_bottom_domain_alloc,
> > +	.free = mtk_msi_bottom_domain_free,
> > +};
> > +
> >  static void mtk_intx_mask(struct irq_data *data)
> >  {
> >  	struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data);
> > @@ -360,6 +571,7 @@ static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port)
> >  {
> >  	struct device *dev = port->dev;
> >  	struct device_node *intc_node, *node = dev->of_node;
> > +	int ret;
> >  
> >  	raw_spin_lock_init(&port->irq_lock);
> >  
> > @@ -377,7 +589,34 @@ static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port)
> >  		return -ENODEV;
> >  	}
> >  
> > +	/* Setup MSI */
> > +	mutex_init(&port->lock);
> > +
> > +	port->msi_bottom_domain = irq_domain_add_linear(node, PCIE_MSI_IRQS_NUM,
> > +				  &mtk_msi_bottom_domain_ops, port);
> > +	if (!port->msi_bottom_domain) {
> > +		dev_info(dev, "failed to create MSI bottom domain\n");
> > +		ret = -ENODEV;
> > +		goto err_msi_bottom_domain;
> > +	}
> > +
> > +	port->msi_domain = pci_msi_create_irq_domain(dev->fwnode,
> > +						     &mtk_msi_domain_info,
> > +						     port->msi_bottom_domain);
> > +	if (!port->msi_domain) {
> > +		dev_info(dev, "failed to create MSI domain\n");
> > +		ret = -ENODEV;
> > +		goto err_msi_domain;
> > +	}
> > +
> >  	return 0;
> > +
> > +err_msi_domain:
> > +	irq_domain_remove(port->msi_bottom_domain);
> > +err_msi_bottom_domain:
> > +	irq_domain_remove(port->intx_domain);
> > +
> > +	return ret;
> >  }
> >  
> >  static void mtk_pcie_irq_teardown(struct mtk_pcie_port *port)
> > @@ -387,9 +626,39 @@ static void mtk_pcie_irq_teardown(struct mtk_pcie_port *port)
> >  	if (port->intx_domain)
> >  		irq_domain_remove(port->intx_domain);
> >  
> > +	if (port->msi_domain)
> > +		irq_domain_remove(port->msi_domain);
> > +
> > +	if (port->msi_bottom_domain)
> > +		irq_domain_remove(port->msi_bottom_domain);
> > +
> >  	irq_dispose_mapping(port->irq);
> >  }
> >  
> > +static void mtk_pcie_msi_handler(struct mtk_pcie_port *port, int set_idx)
> > +{
> > +	struct mtk_msi_set *msi_set = &port->msi_sets[set_idx];
> > +	unsigned long msi_enable, msi_status;
> > +	unsigned int virq;
> > +	irq_hw_number_t bit, hwirq;
> > +
> > +	msi_enable = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
> > +
> > +	do {
> > +		msi_status = readl_relaxed(msi_set->base +
> > +					   PCIE_MSI_SET_STATUS_OFFSET);
> > +		msi_status &= msi_enable;
> > +		if (!msi_status)
> > +			break;
> > +
> > +		for_each_set_bit(bit, &msi_status, PCIE_MSI_IRQS_PER_SET) {
> > +			hwirq = bit + set_idx * PCIE_MSI_IRQS_PER_SET;
> > +			virq = irq_find_mapping(port->msi_bottom_domain, hwirq);
> > +			generic_handle_irq(virq);
> > +		}
> > +	} while (true);
> > +}
> > +
> >  static void mtk_pcie_irq_handler(struct irq_desc *desc)
> >  {
> >  	struct mtk_pcie_port *port = irq_desc_get_handler_data(desc);
> > @@ -408,6 +677,14 @@ static void mtk_pcie_irq_handler(struct irq_desc *desc)
> >  		generic_handle_irq(virq);
> >  	}
> >  
> > +	irq_bit = PCIE_MSI_SHIFT;
> > +	for_each_set_bit_from(irq_bit, &status, PCIE_MSI_SET_NUM +
> > +			      PCIE_MSI_SHIFT) {
> > +		mtk_pcie_msi_handler(port, irq_bit - PCIE_MSI_SHIFT);
> > +
> > +		writel_relaxed(BIT(irq_bit), port->base + PCIE_INT_STATUS_REG);
> 
> Isn't this write the same thing you have for EOI in the INTx case?
> While I could understand your description in that case (this is a
> resampling operation), I don't get what this does here. Either this is
> also an EOI, but your initial description doesn't make sense, or it is
> an Ack, and it should be moved to the right place.
> 
> Which one is it?

I think it should be an EOI which used to clear the interrupt status of
a single set in the PCIe intc field, maybe I should move it to the end
of the mtk_pcie_msi_handler() function.

                  +-----+
                  | GIC |
                  +-----+
                     ^
                     |
                 port->irq
                     |
             +-+-+-+-+-+-+-+-+
             |0|1|2|3|4|5|6|7| (PCIe intc)
             +-+-+-+-+-+-+-+-+
              ^ ^           ^
              | |    ...    |
      +-------+ +------+    +-----------+
      |                |                |
+-+-+---+--+--+  +-+-+---+--+--+  +-+-+---+--+--+
|0|1|...|30|31|  |0|1|...|30|31|  |0|1|...|30|31| (MSI sets)
+-+-+---+--+--+  +-+-+---+--+--+  +-+-+---+--+--+
 ^ ^      ^  ^    ^ ^      ^  ^    ^ ^      ^  ^
 | |      |  |    | |      |  |    | |      |  |  (MSI vectors)
 | |      |  |    | |      |  |    | |      |  |

  (MSI SET0)       (MSI SET1)  ...   (MSI SET7)

I would like to ask another question. In this interrupt architecture, we
cannot implement an affinity for PCIe interrupts, so we return a
negative value in the mtk_pcie_set_affinity callback as follows: 

+static int mtk_pcie_set_affinity(struct irq_data *data,
+                                const struct cpumask *mask, bool force)
+{
+       return -EINVAL;
+}

But there will always be error logs when hotplug a CPU:

~ # echo 0 > /sys/devices/system/cpu/cpu1/online
[   93.633059] IRQ255: set affinity failed(-22).
[   93.633624] IRQ256: set affinity failed(-22).
[   93.634222] CPU1: shutdown
[   93.634586] psci: CPU1 killed (polled 0 ms)

Or when the system suspends:

~ # echo mem > /sys/power/state
[   93.635145] cpuhp: cpu_off cluster=0, cpu=1
[  169.835653] PM: suspend entry (deep)
[  169.836717] Filesystems sync: 0.000 seconds
[  169.837924] Freezing user space processes ... (elapsed 0.001 seconds)
done.
[  169.839922] OOM killer disabled.
[  169.840336] Freezing remaining freezable tasks ... (elapsed 0.001
seconds) done.
[  169.844715] Disabling non-boot CPUs ...
[  169.846443] IRQ255: set affinity failed(-22).
[  169.847002] IRQ256: set affinity failed(-22).
[  169.847586] CPU2: shutdown
[  169.847943] psci: CPU2 killed (polled 0 ms)
[  169.848489] cpuhp: cpu_off cluster=0, cpu=2
[  169.850285] IRQ255: set affinity failed(-22).
[  169.851369] IRQ256: set affinity failed(-22).
...

Sometimes this can cause misunderstandings to users, do we have a chance
to prevent this error log?

> 
> Thanks,
> 
> 	M.
> 

Thanks.


More information about the linux-arm-kernel mailing list