[PATCH] arch_topology: Support SMT control on arm64

Dietmar Eggemann dietmar.eggemann at arm.com
Wed Sep 20 10:08:28 PDT 2023


On 19/09/2023 14:33, Yicong Yang wrote:
> From: Yicong Yang <yangyicong at hisilicon.com>
> 
> The core CPU control framework supports runtime SMT control which
> is not yet supported on arm64. Besides the general vulnerabilities
> concerns we want this runtime control on our arm64 server for:
> 
> - better single CPU performance in some cases
> - saving overall power consumption
> 
> This patch implements it in the following aspects:
> 
> - implement the callbacks of the core

I see only 1 function here: topology_is_primary_thread() ?

> - update the SMT status after the topology enumerated on arm64

That's the call init_cpu_topology()
                 topology_smt_set_num_threads()
                  cpu_smt_set_num_threads()

> - select HOTPLUG_SMT for arm64
> 
> For disabling SMT we'll offline all the secondary threads and

`disabling SMT` means here setting cpu_smt_control=CPU_SMT_DISABLED ?

> only leave the primary thread. Since we don't have restriction
> for primary thread selection, the first thread is chosen as the
> primary thread in this implementation.
> 
> Tests has been done on our ACPI based arm64 server and on
> ACPI/OF based QEMU VMs.
> 
> Signed-off-by: Yicong Yang <yangyicong at hisilicon.com>
> ---
>  arch/arm64/Kconfig            |  1 +
>  drivers/base/arch_topology.c  | 63 +++++++++++++++++++++++++++++++++++
>  include/linux/arch_topology.h | 11 ++++++
>  3 files changed, 75 insertions(+)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index b10515c0200b..531a71c7f499 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -233,6 +233,7 @@ config ARM64
>  	select HAVE_KRETPROBES
>  	select HAVE_GENERIC_VDSO
>  	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
> +	select HOTPLUG_SMT if SMP
>  	select IRQ_DOMAIN
>  	select IRQ_FORCED_THREADING
>  	select KASAN_VMALLOC if KASAN
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index b741b5ba82bd..75a693834fff 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -729,6 +729,63 @@ const struct cpumask *cpu_clustergroup_mask(int cpu)
>  	return &cpu_topology[cpu].cluster_sibling;
>  }
>  
> +#ifdef CONFIG_HOTPLUG_SMT
> +static int topology_smt_num_threads = 1;
> +
> +void __init topology_smt_set_num_threads(void)
> +{
> +	int cpu, sibling, threads;
> +
> +	/*
> +	 * Walk all the CPUs to find the largest thread number, in case we're
> +	 * on a heterogeneous platform with only part of the CPU cores support
> +	 * SMT.
> +	 *
> +	 * Get the thread number by checking the CPUs with same core id
> +	 * rather than checking the topology_sibling_cpumask(), since the
> +	 * sibling mask will not cover all the CPUs if there's CPU offline.
> +	 */
> +	for_each_possible_cpu(cpu) {
> +		threads = 1;
> +
> +		/* Invalid thread id, this CPU is not in a SMT core */
> +		if (cpu_topology[cpu].thread_id == -1)
> +			continue;
> +
> +		for_each_possible_cpu(sibling) {
> +			if (sibling == cpu || cpu_topology[sibling].thread_id == -1)
> +				continue;
> +
> +			if (cpu_topology[cpu].core_id == cpu_topology[sibling].core_id)
> +				threads++;
> +		}
> +
> +		if (threads > topology_smt_num_threads)
> +			topology_smt_num_threads = threads;
> +	}
> +
> +	/*
> +	 * We don't support CONFIG_SMT_NUM_THREADS_DYNAMIC so make the
> +	 * max_threads == num_threads.
> +	 */
> +	cpu_smt_set_num_threads(topology_smt_num_threads, topology_smt_num_threads);
> +}
> +
> +/*
> + * On SMT Hotplug the primary thread of the SMT won't be disabled. For x86 they
> + * seem to have a primary thread for special purpose. For other arthitectures
> + * like arm64 there's no such restriction for a primary thread, so make the
> + * first thread in the SMT as the primary thread.
> + */
> +bool topology_is_primary_thread(unsigned int cpu)
> +{
> +	if (cpu == cpumask_first(topology_sibling_cpumask(cpu)))
> +		return true;
> +
> +	return false;
> +}
> +#endif
> +
>  void update_siblings_masks(unsigned int cpuid)
>  {
>  	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
> @@ -841,6 +898,12 @@ void __init init_cpu_topology(void)
>  		reset_cpu_topology();
>  	}
>  
> +	/*
> +	 * By this stage we get to know whether we support SMT or not, update
> +	 * the information for the core.
> +	 */
> +	topology_smt_set_num_threads();
> +

So this would be the diff between x86 and arm64:

start_kernel() [init/main.c]

 arch_cpu_finalize_init() [arch/x86/kernel/cpu/common.c] <- x86

  identify_boot_cpu() [arch/x86/kernel/cpu/common.c]

   detect_ht() [arch/x86/kernel/cpu/common.c]

    detect_ht_early() [arch/x86/kernel/cpu/common.c]

  cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings) <- (1)
   

 arch_call_rest_init() [init/main.c] <- arm64

  rest_init() [init/main.c]

   kernel_init() [init/main.c]

    kernel_init_freeable() [init/main.c]

     smp_prepare_cpus() [arch/arm64/kernel/smp.c]

      init_cpu_topology() [drivers/base/arch_topology.c]

       topology_smt_set_num_threads()

        cpu_smt_set_num_threads(topology_smt_num_threads, topology_smt_num_threads) <- (1) 

[...]

Did some rough testing with your patch on an SMT4 Arm64 server with 256
CPUs:

(1) CPU hp out all secondaries from the thread_siblings masks

for i in {32..255}; do echo 0 > /sys/devices/system/cpu/cpu$i/online; done

(2) Check thread_siblings cpumasks

cat /sys/devices/system/cpu/cpu*/topology/thread_siblings
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000002
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000400
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000800
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00001000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00002000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00004000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00008000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00010000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00020000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00040000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00080000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000004
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00100000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00200000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00400000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00800000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,01000000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,02000000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,04000000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,08000000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,10000000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,20000000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000008
00000000,00000000,00000000,00000000,00000000,00000000,00000000,40000000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,80000000
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000010
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000020
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000040
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000080
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000100
00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000200

(3) CPU hp out and out CPU31

echo 0 > /sys/devices/system/cpu/cpu31/online
echo 1 > /sys/devices/system/cpu/cpu31/online

cpu_smt_control is still CPU_SMT_ENABLED in cpu_smt_allowed() so
topology_is_primary_thread() isn't called? 



More information about the linux-arm-kernel mailing list