[RFC PATCH] arch_topology: Pre-allocate cacheinfo from primary CPU

Thu Mar 23 15:42:42 PDT 2023

Commit 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU")
tries to build the cacheinfo from the primary CPU prior to secondary
CPUs boot, if the DT/ACPI description contains cache information.
However, if such information is not present, it still reverts to the old
behavior, which allocates the cacheinfo memory on each secondary CPU. On
RT kernels, this triggers a "BUG: sleeping function called from invalid
context" because the allocation is done before preemption is first
enabled on the secondary CPU.

The solution is to add cache information to DT/ACPI, but at least on
aarch64 systems this can be avoided by leveraging automatic detection
(through the CLIDR_EL1 register), which is already implemented but
currently doesn't work on RT kernels for the reason described above.

This patch attempts to enable automatic detection for RT kernels when no
DT/ACPI cache information is available, by pre-allocating cacheinfo
memory on the primary CPU. The allocated memory size depends on the
number of cache leaves, which at that point is unknown without the
DT/ACPI information. What this patch does is guess the number of cache
leaves and pre-allocate memory on the primary CPU, then go back and
reallocate the memory if the guess turns out to be wrong when automatic
detection eventually runs on the secondary CPU. In that case, it will
basically revert to the original behavior and still trigger a splat on
RT kernels. The assumption is that most systems have identical CPUs, so
the number of cache leaves will be the same on the secondary CPUs as the
primary CPU. The "guess" uses the number of leaves of the primary CPU.

If the DT/ACPI cache information is present, the previous behavior of
pre-allocating memory through init_cpu_topology() is preserved.

With this patch applied, automatic detection should work on RT kernels
for all systems with identical CPUs, without requiring to modify the
DT/ACPI to include the cache information.

Signed-off-by: Radu Rendec <rrendec at redhat.com>
---
 arch/arm64/kernel/smp.c   |  3 +++
 drivers/base/cacheinfo.c  | 49 +++++++++++++++++++++++++++++++++++++--
 include/linux/cacheinfo.h |  1 +
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 4e8327264255..7ee2c38185d4 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -33,6 +33,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/kexec.h>
 #include <linux/kvm_host.h>
+#include <linux/cacheinfo.h>
 
 #include <asm/alternative.h>
 #include <asm/atomic.h>
@@ -730,6 +731,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	numa_store_cpu_info(this_cpu);
 	numa_add_cpu(this_cpu);
 
+	pre_alloc_cache_info();
+
 	/*
 	 * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set
 	 * secondary CPUs present.
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index f6573c335f4c..c7d691ef7839 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -28,6 +28,9 @@ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
 #define per_cpu_cacheinfo_idx(cpu, idx)		\
 				(per_cpu_cacheinfo(cpu) + (idx))
 
+static DEFINE_PER_CPU(struct cacheinfo *, pre_alloc_ci_list);
+static unsigned int pre_alloc_ci_leaves;
+
 struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
 {
 	return ci_cacheinfo(cpu);
@@ -408,9 +411,51 @@ int __weak populate_cache_leaves(unsigned int cpu)
 	return -ENOENT;
 }
 
-static inline
-int allocate_cache_info(int cpu)
+void pre_alloc_cache_info(void)
 {
+	unsigned int leaves = cache_leaves(smp_processor_id());
+	unsigned int cpu;
+	struct cacheinfo *ci;
+
+	if (!leaves)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu_cacheinfo(cpu))
+			/*
+			 * Early allocation through init_cpu_topology() was
+			 * successful, so there is no point in pre-allocating.
+			 */
+			continue;
+
+		ci = kcalloc(leaves, sizeof(struct cacheinfo), GFP_ATOMIC);
+		if (!ci) {
+			for_each_possible_cpu(cpu)
+				kfree(per_cpu(pre_alloc_ci_list, cpu));
+			return;
+		}
+
+		per_cpu(pre_alloc_ci_list, cpu) = ci;
+	}
+
+	pre_alloc_ci_leaves = leaves;
+}
+
+static int allocate_cache_info(int cpu)
+{
+	struct cacheinfo *ci = per_cpu(pre_alloc_ci_list, cpu);
+
+	if (ci) {
+		per_cpu(pre_alloc_ci_list, cpu) = NULL;
+
+		if (cache_leaves(cpu) <= pre_alloc_ci_leaves) {
+			per_cpu_cacheinfo(cpu) = ci;
+			return 0;
+		}
+
+		kfree(ci);
+	}
+
 	per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
 					 sizeof(struct cacheinfo), GFP_ATOMIC);
 	if (!per_cpu_cacheinfo(cpu)) {
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 908e19d17f49..23f9dac61d67 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -85,6 +85,7 @@ int populate_cache_leaves(unsigned int cpu);
 int cache_setup_acpi(unsigned int cpu);
 bool last_level_cache_is_valid(unsigned int cpu);
 bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y);
+void pre_alloc_cache_info(void);
 int fetch_cache_info(unsigned int cpu);
 int detect_cache_attributes(unsigned int cpu);
 #ifndef CONFIG_ACPI_PPTT
-- 
2.39.2