[PATCH] sched: topology: make cache topology separate from cpu topology

Thu Mar 10 04:58:44 PST 2022

From: Wang Qing <wangqing at vivo.com>

Some architectures(e.g. ARM64), caches are implemented below:
cluster:              ****** cluster 0 *****      ****** cluster 1 *****
core:                 0      1      2      3      4      5      6      7
cache(Leveln):        **cache0**   **cache1**	 **cache2**   **cache3**
sd_llc_id(current):   0      0      0      0      4      4      4      4
sd_llc_id(should be): 0      0      2      2      4      4      6      6

Caches and cpus have different topology, this causes cpus_share_cache()
return the wrong value, which will affect the CPU load balance.

Cache topology should be separated with CPU topology, it can be obtained
from "next-level-cache" in DTS preferentially.

Signed-off-by: Wang Qing <wangqing at vivo.com>
---
 arch/arm64/kernel/smp.c       |  1 +
 drivers/base/arch_topology.c  | 23 +++++++++++++++++++++++
 include/linux/arch_topology.h |  3 +++
 kernel/sched/topology.c       | 33 +++++++++++++++++++++++++++++++--
 4 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 27df5c1..94cf649
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -723,6 +723,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	unsigned int this_cpu;
 
 	init_cpu_topology();
+	init_cpu_cache_topology();
 
 	this_cpu = smp_processor_id();
 	store_cpu_topology(this_cpu);
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 9761541..613213f
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -613,6 +613,7 @@ static int __init parse_dt_topology(void)
  */
 struct cpu_topology cpu_topology[NR_CPUS];
 EXPORT_SYMBOL_GPL(cpu_topology);
+struct device_node *cache_topology[NR_CPUS][MAX_CPU_CACHE_LEVEL];
 
 const struct cpumask *cpu_coregroup_mask(int cpu)
 {
@@ -738,4 +739,26 @@ void __init init_cpu_topology(void)
 	else if (of_have_populated_dt() && parse_dt_topology())
 		reset_cpu_topology();
 }
+
+void __init init_cpu_cache_topology(void)
+{
+	struct device_node *node_cpu, *node_cache;
+	int cpu, level;
+
+	for_each_possible_cpu(cpu) {
+		node_cpu = of_get_cpu_node(cpu, NULL);
+		if (!node_cpu)
+			continue;
+
+		level = 0;
+		node_cache = node_cpu;
+		while (level < MAX_CPU_CACHE_LEVEL) {
+			node_cache = of_parse_phandle(node_cache, "next-level-cache", 0);
+			if (!node_cache)
+				break;
+			cache_topology[cpu][level++] = node_cache;
+		}
+		of_node_put(node_cpu);
+	}
+}
 #endif
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index cce6136b..d37f47d
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -72,6 +72,8 @@ struct cpu_topology {
 };
 
 #ifdef CONFIG_GENERIC_ARCH_TOPOLOGY
+#define MAX_CPU_CACHE_LEVEL 7
+extern struct device_node *cache_topology[NR_CPUS][MAX_CPU_CACHE_LEVEL];
 extern struct cpu_topology cpu_topology[NR_CPUS];
 
 #define topology_physical_package_id(cpu)	(cpu_topology[cpu].package_id)
@@ -82,6 +84,7 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
 #define topology_cluster_cpumask(cpu)	(&cpu_topology[cpu].cluster_sibling)
 #define topology_llc_cpumask(cpu)	(&cpu_topology[cpu].llc_sibling)
 void init_cpu_topology(void);
+void init_cpu_cache_topology(void);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
 const struct cpumask *cpu_clustergroup_mask(int cpu);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d201a70..10850d6
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -650,6 +650,36 @@ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
 DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
 DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
 
+static void set_sd_llc(int cpu, struct sched_domain *sd, int *first_cpu, int *cpu_num)
+{
+	int cache_level, cpu_id;
+	int first, last;
+	int id = cpumask_first(sched_domain_span(sd));
+	int size = cpumask_weight(sched_domain_span(sd));
+
+	*first_cpu = id;
+	*cpu_num = size;
+
+	for (cache_level = 0; cache_level < MAX_CPU_CACHE_LEVEL; cache_level++) {
+		if (!cache_topology[cpu][cache_level])
+			break;
+
+		first = -1;
+		last = id;
+		for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++) {
+			if (cache_topology[cpu][cache_level] == cache_topology[cpu_id][cache_level]) {
+				if (cpu_id >= id && cpu_id < id + size) {
+					first = (first == -1)?cpu_id:first;
+					last = cpu_id;
+				} else
+					return;
+			}
+		}
+		*first_cpu = first;
+		*cpu_num = last - first + 1;
+	}
+}
+
 static void update_top_cache_domain(int cpu)
 {
 	struct sched_domain_shared *sds = NULL;
@@ -659,8 +689,7 @@ static void update_top_cache_domain(int cpu)
 
 	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
 	if (sd) {
-		id = cpumask_first(sched_domain_span(sd));
-		size = cpumask_weight(sched_domain_span(sd));
+		set_sd_llc(cpu, sd, &id, &size);
 		sds = sd->shared;
 	}
 
-- 
2.7.4