[PATCH v2 3/7] irqchip/apple-aic: Add Fast IPI support
Marc Zyngier
maz at kernel.org
Fri Feb 25 06:39:24 PST 2022
On Thu, 24 Feb 2022 13:07:37 +0000,
Hector Martin <marcan at marcan.st> wrote:
>
> The newer AICv2 present in t600x SoCs does not have legacy IPI support
> at all. Since t8103 also supports Fast IPIs, implement support for this
> first. The legacy IPI code is left as a fallback, so it can be
> potentially used by older SoCs in the future.
>
> The vIPI code is shared; only the IPI firing/acking bits change for Fast
> IPIs.
>
> Signed-off-by: Hector Martin <marcan at marcan.st>
> ---
> drivers/irqchip/irq-apple-aic.c | 122 ++++++++++++++++++++++++++++----
> 1 file changed, 109 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
> index 38091ebb9403..613e0ebdabdc 100644
> --- a/drivers/irqchip/irq-apple-aic.c
> +++ b/drivers/irqchip/irq-apple-aic.c
> @@ -24,7 +24,7 @@
> * - Default "this CPU" register view and explicit per-CPU views
> *
> * In addition, this driver also handles FIQs, as these are routed to the same
> - * IRQ vector. These are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and
> + * IRQ vector. These are used for Fast IPIs, the ARMv8 timer IRQs, and
> * performance counters (TODO).
> *
> * Implementation notes:
> @@ -52,9 +52,11 @@
> #include <linux/irqchip.h>
> #include <linux/irqchip/arm-vgic-info.h>
> #include <linux/irqdomain.h>
> +#include <linux/jump_label.h>
> #include <linux/limits.h>
> #include <linux/of_address.h>
> #include <linux/slab.h>
> +#include <asm/cputype.h>
> #include <asm/exception.h>
> #include <asm/sysreg.h>
> #include <asm/virt.h>
> @@ -106,7 +108,6 @@
>
> /*
> * IMP-DEF sysregs that control FIQ sources
> - * Note: sysreg-based IPIs are not supported yet.
> */
>
> /* Core PMC control register */
> @@ -155,6 +156,10 @@
> #define SYS_IMP_APL_UPMSR_EL1 sys_reg(3, 7, 15, 6, 4)
> #define UPMSR_IACT BIT(0)
>
> +/* MPIDR fields */
> +#define MPIDR_CPU(x) MPIDR_AFFINITY_LEVEL(x, 0)
> +#define MPIDR_CLUSTER(x) MPIDR_AFFINITY_LEVEL(x, 1)
> +
> #define AIC_NR_FIQ 4
> #define AIC_NR_SWIPI 32
>
> @@ -173,11 +178,44 @@
> #define AIC_TMR_EL02_PHYS AIC_TMR_GUEST_PHYS
> #define AIC_TMR_EL02_VIRT AIC_TMR_GUEST_VIRT
>
> +DEFINE_STATIC_KEY_TRUE(use_fast_ipi);
> +
> +struct aic_info {
> + int version;
> +
> + /* Features */
> + bool fast_ipi;
> +};
> +
> +static const struct aic_info aic1_info = {
> + .version = 1,
> +};
> +
> +static const struct aic_info aic1_fipi_info = {
> + .version = 1,
> +
> + .fast_ipi = true,
> +};
> +
> +static const struct of_device_id aic_info_match[] = {
> + {
> + .compatible = "apple,t8103-aic",
> + .data = &aic1_fipi_info,
> + },
> + {
> + .compatible = "apple,aic",
> + .data = &aic1_info,
> + },
> + {}
> +};
> +
> struct aic_irq_chip {
> void __iomem *base;
> struct irq_domain *hw_domain;
> struct irq_domain *ipi_domain;
> int nr_hw;
> +
> + struct aic_info info;
> };
>
> static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked);
> @@ -386,8 +424,12 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
> */
>
> if (read_sysreg_s(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) {
> - pr_err_ratelimited("Fast IPI fired. Acking.\n");
> - write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
> + if (static_branch_likely(&use_fast_ipi)) {
> + aic_handle_ipi(regs);
> + } else {
> + pr_err_ratelimited("Fast IPI fired. Acking.\n");
> + write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
> + }
> }
>
> if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))
> @@ -563,6 +605,22 @@ static const struct irq_domain_ops aic_irq_domain_ops = {
> * IPI irqchip
> */
>
> +static void aic_ipi_send_fast(int cpu)
> +{
> + u64 mpidr = cpu_logical_map(cpu);
> + u64 my_mpidr = read_cpuid_mpidr();
> + u64 cluster = MPIDR_CLUSTER(mpidr);
> + u64 idx = MPIDR_CPU(mpidr);
> +
> + if (MPIDR_CLUSTER(my_mpidr) == cluster)
> + write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx),
> + SYS_IMP_APL_IPI_RR_LOCAL_EL1);
> + else
> + write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx) | FIELD_PREP(IPI_RR_CLUSTER, cluster),
> + SYS_IMP_APL_IPI_RR_GLOBAL_EL1);
> + isb();
I think you have an ordering issue with this, see below.
> +}
> +
> static void aic_ipi_mask(struct irq_data *d)
> {
> u32 irq_bit = BIT(irqd_to_hwirq(d));
> @@ -588,8 +646,12 @@ static void aic_ipi_unmask(struct irq_data *d)
> * If a pending vIPI was unmasked, raise a HW IPI to ourselves.
> * No barriers needed here since this is a self-IPI.
> */
> - if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit)
> - aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
> + if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit) {
> + if (static_branch_likely(&use_fast_ipi))
> + aic_ipi_send_fast(smp_processor_id());
> + else
> + aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
> + }
> }
>
> static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
> @@ -617,8 +679,12 @@ static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
> smp_mb__after_atomic();
>
> if (!(pending & irq_bit) &&
> - (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit))
> - send |= AIC_IPI_SEND_CPU(cpu);
> + (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit)) {
> + if (static_branch_likely(&use_fast_ipi))
> + aic_ipi_send_fast(cpu);
OK, this is suffering from the same issue that GICv3 has, which is
that memory barriers don't provide order against sysregs. You need a
DSB for that, which is a pain. Something like this:
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 602c8b274170..579f22b72339 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -736,6 +736,15 @@ static void aic_ipi_send_fast(int cpu)
u64 cluster = MPIDR_CLUSTER(mpidr);
u64 idx = MPIDR_CPU(mpidr);
+ /*
+ * A DSB is required here in to order prior writes to memory
+ * with system register accesses having a side effect
+ * (matching the behaviour of the IPI registers). Make sure we
+ * only order stores with in the IS domain, keeping as light
+ * as possible.
+ */
+ dsb(ishst);
+
if (MPIDR_CLUSTER(my_mpidr) == cluster)
write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx),
SYS_IMP_APL_IPI_RR_LOCAL_EL1);
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
More information about the linux-arm-kernel
mailing list