[PATCH 3/3] arm_pmu: rework ACPI probing

Mark Rutland mark.rutland at arm.com
Fri Sep 30 04:18:44 PDT 2022


The current ACPI PMU probing logic tries to associate PMUs with CPUs
when the CPU is first brought online, in order to handle late hotplug,
though PMUs are only registered during early boot, and so for late
hotplugged CPUs this can only associate the CPU with an existing PMU.

We tried to be clever and the have the arm_pmu_acpi_cpu_starting()
callback allocate a struct arm_pmu when no matching instance is found,
in order to avoid duplication of logic. However, as above this doesn't
do anything useful for late hotplugged CPUs, and this requires us to
allocate memory in an atomic context, which is especially problematic
for PREEMPT_RT, as reported by Valentin and Pierre.

This patch reworks the probing to detect PMUs for all online CPUs in the
arm_pmu_acpi_probe() function, which is more aligned with how DT probing
works. The arm_pmu_acpi_cpu_starting() callback only tries to associate
CPUs with an existing arm_pmu instance, avoiding the problem of
allocating in atomic context.

Note that as we didn't previously register PMUs for late-hotplugged
CPUs, this change doesn't result in a loss of existing functionality,
though we will now warn when we cannot associate a CPU with a PMU.

This change allows us to pull the hotplug callback registration into the
arm_pmu_acpi_probe() function, as we no longer need the callbacks to be
invoked shortly after probing the boot CPUs, and can register it without
invoking the calls.

For the moment the arm_pmu_acpi_init() initcall remains to register the
SPE PMU, though in future this should probably be moved elsewhere (e.g.
the arm64 ACPI init code), since this doesn't need to be tied to the
regular CPU PMU code.

Signed-off-by: Mark Rutland <mark.rutland at arm.com>
Reported-by: Valentin Schneider <valentin.schneider at arm.com>
Link: https://lore.kernel.org/r/20210810134127.1394269-2-valentin.schneider@arm.com/
Reported-by: Pierre Gondois <pierre.gondois at arm.com>
Link: https://lore.kernel.org/linux-arm-kernel/20220912155105.1443303-1-pierre.gondois@arm.com/
Cc: Pierre Gondois <pierre.gondois at arm.com>
Cc: Valentin Schneider <vschneid at redhat.com>
Cc: Will Deacon <will at kernel.org>
---
 drivers/perf/arm_pmu.c       | 17 ++-----
 drivers/perf/arm_pmu_acpi.c  | 95 +++++++++++++++++++-----------------
 include/linux/perf/arm_pmu.h |  1 -
 3 files changed, 52 insertions(+), 61 deletions(-)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 3f07df5a7e95..82a6d22e8ee2 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -861,16 +861,16 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 					    &cpu_pmu->node);
 }
 
-static struct arm_pmu *__armpmu_alloc(gfp_t flags)
+struct arm_pmu *armpmu_alloc(void)
 {
 	struct arm_pmu *pmu;
 	int cpu;
 
-	pmu = kzalloc(sizeof(*pmu), flags);
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
 	if (!pmu)
 		goto out;
 
-	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
+	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, GFP_KERNEL);
 	if (!pmu->hw_events) {
 		pr_info("failed to allocate per-cpu PMU data.\n");
 		goto out_free_pmu;
@@ -916,17 +916,6 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
 	return NULL;
 }
 
-struct arm_pmu *armpmu_alloc(void)
-{
-	return __armpmu_alloc(GFP_KERNEL);
-}
-
-struct arm_pmu *armpmu_alloc_atomic(void)
-{
-	return __armpmu_alloc(GFP_ATOMIC);
-}
-
-
 void armpmu_free(struct arm_pmu *pmu)
 {
 	free_percpu(pmu->hw_events);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 99abea3b2cc9..a085e45b509e 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -13,6 +13,7 @@
 #include <linux/percpu.h>
 #include <linux/perf/arm_pmu.h>
 
+#include <asm/cpu.h>
 #include <asm/cputype.h>
 
 static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus);
@@ -204,26 +205,6 @@ static struct arm_pmu *arm_pmu_acpi_find_pmu(void)
 	return NULL;
 }
 
-static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
-{
-	struct arm_pmu *pmu;
-
-	pmu = arm_pmu_acpi_find_pmu();
-	if (pmu)
-		return pmu;
-
-	pmu = armpmu_alloc_atomic();
-	if (!pmu) {
-		pr_warn("Unable to allocate PMU for CPU%d\n",
-			smp_processor_id());
-		return NULL;
-	}
-
-	pmu->acpi_cpuid = read_cpuid_id();
-
-	return pmu;
-}
-
 /*
  * Check whether the new IRQ is compatible with those already associated with
  * the PMU (e.g. we don't have mismatched PPIs).
@@ -286,26 +267,45 @@ static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
 	if (per_cpu(probed_pmus, cpu))
 		return 0;
 
-	pmu = arm_pmu_acpi_find_alloc_pmu();
-	if (!pmu)
-		return -ENOMEM;
+	pmu = arm_pmu_acpi_find_pmu();
+	if (!pmu) {
+		pr_warn_ratelimited("Unable to associate CPU%d with a PMU\n",
+				    cpu);
+		return 0;
+	}
 
 	arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
-
-	/*
-	 * Ideally, we'd probe the PMU here when we find the first matching
-	 * CPU. We can't do that for several reasons; see the comment in
-	 * arm_pmu_acpi_init().
-	 *
-	 * So for the time being, we're done.
-	 */
 	return 0;
 }
 
+static void arm_pmu_acpi_probe_matching_cpus(struct arm_pmu *pmu,
+					     unsigned long cpuid)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		unsigned long cpu_cpuid = per_cpu(cpu_data, cpu).reg_midr;
+
+		if (cpu_cpuid == cpuid)
+			arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
+	}
+}
+
 int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 {
 	int pmu_idx = 0;
-	int cpu, ret;
+	unsigned int cpu;
+	int ret;
+
+	ret = arm_pmu_acpi_parse_irqs();
+	if (ret)
+		return ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_ACPI_STARTING,
+					"perf/arm/pmu_acpi:starting",
+					arm_pmu_acpi_cpu_starting, NULL);
+	if (ret)
+		return ret;
 
 	/*
 	 * Initialise and register the set of PMUs which we know about right
@@ -320,13 +320,26 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 	 * For the moment, as with the platform/DT case, we need at least one
 	 * of a PMU's CPUs to be online at probe time.
 	 */
-	for_each_possible_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct arm_pmu *pmu = per_cpu(probed_pmus, cpu);
+		unsigned long cpuid;
 		char *base_name;
 
-		if (!pmu || pmu->name)
+		/* If we've already probed this CPU, we have nothing to do */
+		if (pmu)
 			continue;
 
+		pmu = armpmu_alloc();
+		if (!pmu) {
+			pr_warn("Unable to allocate PMU for CPU%d\n",
+				cpu);
+		}
+
+		cpuid = per_cpu(cpu_data, cpu).reg_midr;
+		pmu->acpi_cpuid = cpuid;
+
+		arm_pmu_acpi_probe_matching_cpus(pmu, cpuid);
+
 		ret = init_fn(pmu);
 		if (ret == -ENODEV) {
 			/* PMU not handled by this driver, or not present */
@@ -351,26 +364,16 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 		}
 	}
 
-	return 0;
+	return ret;
 }
 
 static int arm_pmu_acpi_init(void)
 {
-	int ret;
-
 	if (acpi_disabled)
 		return 0;
 
 	arm_spe_acpi_register_device();
 
-	ret = arm_pmu_acpi_parse_irqs();
-	if (ret)
-		return ret;
-
-	ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_ACPI_STARTING,
-				"perf/arm/pmu_acpi:starting",
-				arm_pmu_acpi_cpu_starting, NULL);
-
-	return ret;
+	return 0;
 }
 subsys_initcall(arm_pmu_acpi_init)
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 0407a38b470a..049908af3595 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -173,7 +173,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu);
 
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
-struct arm_pmu *armpmu_alloc_atomic(void);
 void armpmu_free(struct arm_pmu *pmu);
 int armpmu_register(struct arm_pmu *pmu);
 int armpmu_request_irq(int irq, int cpu);
-- 
2.30.2




More information about the linux-arm-kernel mailing list