[PATCH v2 09/13] PCI: xgene-msi: Sanitise MSI allocation and affinity setting

Lorenzo Pieralisi lpieralisi at kernel.org
Fri Jul 11 03:11:02 PDT 2025


On Fri, Jul 11, 2025 at 11:55:17AM +0200, Lorenzo Pieralisi wrote:
> On Tue, Jul 08, 2025 at 06:34:00PM +0100, Marc Zyngier wrote:

[...]

> We could use MSInRx_HWIRQ_MASK, I can update it.
> 
> More importantly, what code would set data->hwirq[6:4] (and
> data->hwirq[7:7] below) ?

Forget it. It is the hwirq allocation itself that sets those bits,
256 HWIRQs you use the effective cpu affinity to steer the frame,
it makes sense now.

I can update the code to use the mask above and merge it.

Sorry for the noise,
Lorenzo

> > +	frame	= FIELD_PREP(BIT(3), FIELD_GET(BIT(7), data->hwirq)) | cpu;
> >  
> > -/*
> > - * X-Gene v1 only has 16 MSI GIC IRQs for 2048 MSI vectors.  To maintain
> > - * the expected behaviour of .set_affinity for each MSI interrupt, the 16
> > - * MSI GIC IRQs are statically allocated to 8 X-Gene v1 cores (2 GIC IRQs
> > - * for each core).  The MSI vector is moved from 1 MSI GIC IRQ to another
> > - * MSI GIC IRQ to steer its MSI interrupt to correct X-Gene v1 core.  As a
> > - * consequence, the total MSI vectors that X-Gene v1 supports will be
> > - * reduced to 256 (2048/8) vectors.
> > - */
> > -static int hwirq_to_cpu(unsigned long hwirq)
> > -{
> > -	return (hwirq % num_possible_cpus());
> > -}
> > +	target_addr = msi->msi_addr;
> > +	target_addr += (FIELD_PREP(MSI_GROUP_MASK, frame) |
> > +			FIELD_PREP(MSI_INTR_MASK, msir));
> >  
> > -static unsigned long hwirq_to_canonical_hwirq(unsigned long hwirq)
> > -{
> > -	return (hwirq - hwirq_to_cpu(hwirq));
> > +	msg->address_hi = upper_32_bits(target_addr);
> > +	msg->address_lo = lower_32_bits(target_addr);
> > +	msg->data = FIELD_GET(DATA_HWIRQ_MASK, data->hwirq);
> >  }
> >  
> >  static int xgene_msi_set_affinity(struct irq_data *irqdata,
> >  				  const struct cpumask *mask, bool force)
> >  {
> >  	int target_cpu = cpumask_first(mask);
> > -	int curr_cpu;
> > -
> > -	curr_cpu = hwirq_to_cpu(irqdata->hwirq);
> > -	if (curr_cpu == target_cpu)
> > -		return IRQ_SET_MASK_OK_DONE;
> >  
> > -	/* Update MSI number to target the new CPU */
> > -	irqdata->hwirq = hwirq_to_canonical_hwirq(irqdata->hwirq) + target_cpu;
> > +	irq_data_update_effective_affinity(irqdata, cpumask_of(target_cpu));
> >  
> > +	/* Force the core code to regenerate the message */
> >  	return IRQ_SET_MASK_OK;
> >  }
> >  
> > @@ -173,23 +167,20 @@ static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
> >  				  unsigned int nr_irqs, void *args)
> >  {
> >  	struct xgene_msi *msi = domain->host_data;
> > -	int msi_irq;
> > +	irq_hw_number_t hwirq;
> >  
> >  	mutex_lock(&msi->bitmap_lock);
> >  
> > -	msi_irq = bitmap_find_next_zero_area(msi->bitmap, NR_MSI_VEC, 0,
> > -					     num_possible_cpus(), 0);
> > -	if (msi_irq < NR_MSI_VEC)
> > -		bitmap_set(msi->bitmap, msi_irq, num_possible_cpus());
> > -	else
> > -		msi_irq = -ENOSPC;
> > +	hwirq = find_first_zero_bit(msi->bitmap, NR_MSI_VEC);
> > +	if (hwirq < NR_MSI_VEC)
> > +		set_bit(hwirq, msi->bitmap);
> >  
> >  	mutex_unlock(&msi->bitmap_lock);
> >  
> > -	if (msi_irq < 0)
> > -		return msi_irq;
> > +	if (hwirq >= NR_MSI_VEC)
> > +		return -ENOSPC;
> >  
> > -	irq_domain_set_info(domain, virq, msi_irq,
> > +	irq_domain_set_info(domain, virq, hwirq,
> >  			    &xgene_msi_bottom_irq_chip, domain->host_data,
> >  			    handle_simple_irq, NULL, NULL);
> 
> This is something I don't get. We alloc an MSI, set a bit in the bitmap
> and the hwirq to that value, when we handle the IRQ below in
> 
> xgene_msi_isr()
> 
> hwirq = compute_hwirq(msi_grp, msir_idx, intr_idx);
> ret = generic_handle_domain_irq(xgene_msi->inner_domain, hwirq);
> 
> imagining that we changed the affinity for the IRQ so that the computed
> HWIRQ does not have zeros in bits[7:4], how would the domain HWIRQ
> matching work ?
> 
> Actually, how would an IRQ fire causing the hwirq[7:4] bits to be != 0
> in the first place ?
> 
> Forgive me if I am missing something obvious, the *current* MSI handling
> is very hard to grok, it is certain I misunderstood it entirely.
> 
> Thanks,
> Lorenzo
> 
> > @@ -201,12 +192,10 @@ static void xgene_irq_domain_free(struct irq_domain *domain,
> >  {
> >  	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
> >  	struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
> > -	u32 hwirq;
> >  
> >  	mutex_lock(&msi->bitmap_lock);
> >  
> > -	hwirq = hwirq_to_canonical_hwirq(d->hwirq);
> > -	bitmap_clear(msi->bitmap, hwirq, num_possible_cpus());
> > +	clear_bit(d->hwirq, msi->bitmap);
> >  
> >  	mutex_unlock(&msi->bitmap_lock);
> >  
> > @@ -263,55 +252,30 @@ static void xgene_msi_isr(struct irq_desc *desc)
> >  	unsigned int *irqp = irq_desc_get_handler_data(desc);
> >  	struct irq_chip *chip = irq_desc_get_chip(desc);
> >  	struct xgene_msi *xgene_msi = xgene_msi_ctrl;
> > -	int msir_index, msir_val, hw_irq, ret;
> > -	u32 intr_index, grp_select, msi_grp;
> > +	unsigned long grp_pending;
> > +	int msir_idx;
> > +	u32 msi_grp;
> >  
> >  	chained_irq_enter(chip, desc);
> >  
> >  	msi_grp = irqp - xgene_msi->gic_irq;
> >  
> > -	/*
> > -	 * MSIINTn (n is 0..F) indicates if there is a pending MSI interrupt
> > -	 * If bit x of this register is set (x is 0..7), one or more interrupts
> > -	 * corresponding to MSInIRx is set.
> > -	 */
> > -	grp_select = xgene_msi_int_read(xgene_msi, msi_grp);
> > -	while (grp_select) {
> > -		msir_index = ffs(grp_select) - 1;
> > -		/*
> > -		 * Calculate MSInIRx address to read to check for interrupts
> > -		 * (refer to termination address and data assignment
> > -		 * described in xgene_compose_msi_msg() )
> > -		 */
> > -		msir_val = xgene_msi_ir_read(xgene_msi, msi_grp, msir_index);
> > -		while (msir_val) {
> > -			intr_index = ffs(msir_val) - 1;
> > -			/*
> > -			 * Calculate MSI vector number (refer to the termination
> > -			 * address and data assignment described in
> > -			 * xgene_compose_msi_msg function)
> > -			 */
> > -			hw_irq = (((msir_index * IRQS_PER_IDX) + intr_index) *
> > -				 NR_HW_IRQS) + msi_grp;
> > -			/*
> > -			 * As we have multiple hw_irq that maps to single MSI,
> > -			 * always look up the virq using the hw_irq as seen from
> > -			 * CPU0
> > -			 */
> > -			hw_irq = hwirq_to_canonical_hwirq(hw_irq);
> > -			ret = generic_handle_domain_irq(xgene_msi->inner_domain, hw_irq);
> > +	grp_pending = xgene_msi_int_read(xgene_msi, msi_grp);
> > +
> > +	for_each_set_bit(msir_idx, &grp_pending, IDX_PER_GROUP) {
> > +		unsigned long msir;
> > +		int intr_idx;
> > +
> > +		msir = xgene_msi_ir_read(xgene_msi, msi_grp, msir_idx);
> > +
> > +		for_each_set_bit(intr_idx, &msir, IRQS_PER_IDX) {
> > +			irq_hw_number_t hwirq;
> > +			int ret;
> > +
> > +			hwirq = compute_hwirq(msi_grp, msir_idx, intr_idx);
> > +			ret = generic_handle_domain_irq(xgene_msi->inner_domain,
> > +							hwirq);
> >  			WARN_ON_ONCE(ret);
> > -			msir_val &= ~(1 << intr_index);
> > -		}
> > -		grp_select &= ~(1 << msir_index);
> > -
> > -		if (!grp_select) {
> > -			/*
> > -			 * We handled all interrupts happened in this group,
> > -			 * resample this group MSI_INTx register in case
> > -			 * something else has been made pending in the meantime
> > -			 */
> > -			grp_select = xgene_msi_int_read(xgene_msi, msi_grp);
> >  		}
> >  	}
> >  
> > -- 
> > 2.39.2
> > 



More information about the linux-arm-kernel mailing list