[PATCH v3 04/11] ARC: Add CPU topology
Noam Camus
noamca at mellanox.com
Thu Jun 15 01:43:54 PDT 2017
From: Noam Camus <noamca at mellanox.com>
Now it is used for NPS SoC for multi-core of 256 cores
and SMT of 16 HW threads per core.
This way with topology the scheduler is much efficient in
creating domains and later using them.
Signed-off-by: Noam Camus <noamca at mellanox.com>
---
arch/arc/Kconfig | 27 ++++++++
arch/arc/include/asm/Kbuild | 1 -
arch/arc/include/asm/topology.h | 34 +++++++++++
arch/arc/kernel/Makefile | 1 +
arch/arc/kernel/setup.c | 4 +-
arch/arc/kernel/smp.c | 5 ++
arch/arc/kernel/topology.c | 125 +++++++++++++++++++++++++++++++++++++++
7 files changed, 194 insertions(+), 3 deletions(-)
create mode 100644 arch/arc/include/asm/topology.h
create mode 100644 arch/arc/kernel/topology.c
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index f464f97..08a9003 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -202,6 +202,33 @@ config ARC_SMP_HALT_ON_RESET
at designated entry point. For other case, all jump to common
entry point and spin wait for Master's signal.
+config NPS_CPU_TOPOLOGY
+ bool "Support cpu topology definition"
+ depends on EZNPS_MTM_EXT
+ default y
+ help
+ Support NPS cpu topology definition.
+ NPS400 got 16 clusters of cores.
+ NPS400 cluster got 16 cores.
+ NPS core got 16 symetrical threads.
+ Totally there are such 4096 threads (NR_CPUS=4096)
+
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on NPS_CPU_TOPOLOGY
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+ bool "SMT scheduler support"
+ depends on NPS_CPU_TOPOLOGY
+ help
+ Improves the CPU scheduler's decision making when dealing with
+ MultiThreading at a cost of slightly increased overhead in some
+ places. If unsure say N here.
+
endif #SMP
config ARC_MCIP
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 7bee4e4..d8cb607 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -43,7 +43,6 @@ generic-y += stat.h
generic-y += statfs.h
generic-y += termbits.h
generic-y += termios.h
-generic-y += topology.h
generic-y += trace_clock.h
generic-y += types.h
generic-y += ucontext.h
diff --git a/arch/arc/include/asm/topology.h b/arch/arc/include/asm/topology.h
new file mode 100644
index 0000000..a9be3f8
--- /dev/null
+++ b/arch/arc/include/asm/topology.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_ARC_TOPOLOGY_H
+#define _ASM_ARC_TOPOLOGY_H
+
+#ifdef CONFIG_NPS_CPU_TOPOLOGY
+
+#include <linux/cpumask.h>
+
+struct cputopo_nps {
+ int thread_id;
+ int core_id;
+ cpumask_t thread_sibling;
+ cpumask_t core_sibling;
+};
+
+extern struct cputopo_nps cpu_topology[NR_CPUS];
+
+#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
+#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARC_TOPOLOGY_H */
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 8942c5c..46af80a 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_NPS_CPU_TOPOLOGY) += topology.o
obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o
CFLAGS_fpu.o += -mdpfp
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index de29ea9..379ebda 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -571,14 +571,14 @@ static void c_stop(struct seq_file *m, void *v)
.show = show_cpuinfo
};
-static DEFINE_PER_CPU(struct cpu, cpu_topology);
+static DEFINE_PER_CPU(struct cpu, cpu_topo_info);
static int __init topology_init(void)
{
int cpu;
for_each_present_cpu(cpu)
- register_cpu(&per_cpu(cpu_topology, cpu), cpu);
+ register_cpu(&per_cpu(cpu_topo_info, cpu), cpu);
return 0;
}
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index f462671..91668c5 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -67,6 +67,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
{
int i;
+ init_cpu_topology();
+ store_cpu_topology(smp_processor_id());
+
/*
* if platform didn't set the present map already, do it now
* boot cpu is set to present already by init/main.c
@@ -151,6 +154,8 @@ void start_kernel_secondary(void)
if (machine_desc->init_per_cpu)
machine_desc->init_per_cpu(cpu);
+ store_cpu_topology(cpu);
+
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
diff --git a/arch/arc/kernel/topology.c b/arch/arc/kernel/topology.c
new file mode 100644
index 0000000..3feb7c9
--- /dev/null
+++ b/arch/arc/kernel/topology.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched/topology.h>
+#include <plat/smp.h>
+
+/*
+ * cpu topology table
+ */
+struct cputopo_nps cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].core_sibling;
+}
+
+static void update_siblings_masks(unsigned int cpuid)
+{
+ struct cputopo_nps *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+ int cpu;
+ struct global_id global_topo, global_id_topo;
+
+ global_id_topo.value = cpuid;
+
+ /* update core and thread sibling masks */
+ for_each_possible_cpu(cpu) {
+ cpu_topo = &cpu_topology[cpu];
+ global_topo.value = cpu;
+
+ if (global_id_topo.cluster != global_topo.cluster)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+ if (cpuid_topo->core_id != cpu_topo->core_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+ }
+
+ /* Do not proceed before masks are written */
+ smp_wmb();
+}
+
+/*
+ * store_cpu_topology is called at boot when only one cpu is running
+ * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
+ * which prevents simultaneous write access to cpu_topology array
+ */
+void store_cpu_topology(unsigned int cpuid)
+{
+ struct cputopo_nps *cpuid_topo = &cpu_topology[cpuid];
+ struct global_id gid;
+
+ /* If the cpu topology has been already set, just return */
+ if (cpuid_topo->core_id != -1)
+ return;
+
+ gid.value = cpuid;
+
+ cpuid_topo->thread_id = gid.thread;
+ cpuid_topo->core_id = ((gid.cluster << 4) | gid.core);
+
+ update_siblings_masks(cpuid);
+
+ pr_debug("CPU%u: thread %d, core %d\n",
+ cpuid, cpu_topology[cpuid].thread_id,
+ cpu_topology[cpuid].core_id);
+}
+
+static struct sched_domain_topology_level nps_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_MC
+ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
+/*
+ * init_cpu_topology is called at boot when only one cpu is running
+ * which prevent simultaneous write access to cpu_topology array
+ */
+void __init init_cpu_topology(void)
+{
+ unsigned int cpu;
+
+ /* init core mask */
+ for_each_possible_cpu(cpu) {
+ struct cputopo_nps *cpu_topo = &(cpu_topology[cpu]);
+
+ cpu_topo->thread_id = -1;
+ cpu_topo->core_id = -1;
+ cpumask_clear(&cpu_topo->core_sibling);
+ cpumask_clear(&cpu_topo->thread_sibling);
+ }
+
+ /* Do not proceed before masks are written */
+ smp_wmb();
+
+ /* Set scheduler topology descriptor */
+ set_sched_topology(nps_topology);
+}
--
1.7.1
More information about the linux-snps-arc
mailing list