[PATCH v5 12/14] arm64, acpi, numa: NUMA support based on SRAT and SLIT

Ganapatrao Kulkarni gpkulkarni at gmail.com
Wed Apr 20 01:31:03 PDT 2016


On Wed, Apr 20, 2016 at 1:11 PM, Dennis Chen <dennis.chen at linaro.org> wrote:
> On 20 April 2016 at 09:40, David Daney <ddaney.cavm at gmail.com> wrote:
>> From: Hanjun Guo <hanjun.guo at linaro.org>
>>
>> Introduce a new file to hold ACPI based NUMA information parsing from
>> SRAT and SLIT.
>>
>> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
>> ranges to Proximity Domain mapping.  SLIT has the information of inter
>> node distances(relative number for access latency).
>>
>> Signed-off-by: Hanjun Guo <hanjun.guo at linaro.org>
>> Signed-off-by: Ganapatrao Kulkarni <gkulkarni at caviumnetworks.com>
>> [rrichter at cavium.com Reworked for numa v10 series ]
>> Signed-off-by: Robert Richter <rrichter at cavium.com>
>> [david.daney at cavium.com reorderd and combinded with other patches in Hanjun Guo's original set]
>> Signed-off-by: David Daney <david.daney at cavium.com>
>> ---
>>  arch/arm64/include/asm/acpi.h |   8 +++
>>  arch/arm64/include/asm/numa.h |   2 +
>>  arch/arm64/kernel/Makefile    |   1 +
>>  arch/arm64/kernel/acpi_numa.c | 149 ++++++++++++++++++++++++++++++++++++++++++
>>  arch/arm64/kernel/smp.c       |   2 +
>>  arch/arm64/mm/numa.c          |   5 +-
>>  6 files changed, 166 insertions(+), 1 deletion(-)
>>  create mode 100644 arch/arm64/kernel/acpi_numa.c
>>
>> diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
>> index aee323b..4b13ecd 100644
>> --- a/arch/arm64/include/asm/acpi.h
>> +++ b/arch/arm64/include/asm/acpi.h
>> @@ -113,4 +113,12 @@ static inline const char *acpi_get_enable_method(int cpu)
>>  pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
>>  #endif
>>
>> +#ifdef CONFIG_ACPI_NUMA
>> +int arm64_acpi_numa_init(void);
>> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
>> +#else
>> +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
>> +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; }
>> +#endif /* CONFIG_ACPI_NUMA */
>> +
>>  #endif /*_ASM_ACPI_H*/
>> diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
>> index e9b4f29..600887e 100644
>> --- a/arch/arm64/include/asm/numa.h
>> +++ b/arch/arm64/include/asm/numa.h
>> @@ -5,6 +5,8 @@
>>
>>  #ifdef CONFIG_NUMA
>>
>> +#define NR_NODE_MEMBLKS                (MAX_NUMNODES * 2)
>> +
>>  /* currently, arm64 implements flat NUMA topology */
>>  #define parent_node(node)      (node)
>>
>> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
>> index 3793003..69569c6 100644
>> --- a/arch/arm64/kernel/Makefile
>> +++ b/arch/arm64/kernel/Makefile
>> @@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_EFI)                       += efi.o efi-entry.stub.o
>>  arm64-obj-$(CONFIG_PCI)                        += pci.o
>>  arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
>>  arm64-obj-$(CONFIG_ACPI)               += acpi.o
>> +arm64-obj-$(CONFIG_ACPI_NUMA)          += acpi_numa.o
>>  arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)        += acpi_parking_protocol.o
>>  arm64-obj-$(CONFIG_PARAVIRT)           += paravirt.o
>>  arm64-obj-$(CONFIG_RANDOMIZE_BASE)     += kaslr.o
>> diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
>> new file mode 100644
>> index 0000000..fd72070
>> --- /dev/null
>> +++ b/arch/arm64/kernel/acpi_numa.c
>> @@ -0,0 +1,149 @@
>> +/*
>> + * ACPI 5.1 based NUMA setup for ARM64
>> + * Lots of code was borrowed from arch/x86/mm/srat.c
>> + *
>> + * Copyright 2004 Andi Kleen, SuSE Labs.
>> + * Copyright (C) 2013-2016, Linaro Ltd.
>> + *             Author: Hanjun Guo <hanjun.guo at linaro.org>
>> + *
>> + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
>> + *
>> + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
>> + * Assumes all memory regions belonging to a single proximity domain
>> + * are in one chunk. Holes between them will be included in the node.
>> + */
>> +
>> +#define pr_fmt(fmt) "ACPI: NUMA: " fmt
>> +
>> +#include <linux/acpi.h>
>> +#include <linux/bitmap.h>
>> +#include <linux/bootmem.h>
>> +#include <linux/kernel.h>
>> +#include <linux/mm.h>
>> +#include <linux/memblock.h>
>> +#include <linux/mmzone.h>
>> +#include <linux/module.h>
>> +#include <linux/topology.h>
>> +
>> +#include <acpi/processor.h>
>> +#include <asm/numa.h>
>> +
>> +static int cpus_in_srat;
>> +
>> +struct __node_cpu_hwid {
>> +       u32 node_id;    /* logical node containing this CPU */
>> +       u64 cpu_hwid;   /* MPIDR for this CPU */
>> +};
>> +
>> +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
>> +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
>> +
>> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
>> +{
>> +       int i;
>> +
>> +       for (i = 0; i < cpus_in_srat; i++) {
>> +               if (hwid == early_node_cpu_hwid[i].cpu_hwid)
>> +                       return early_node_cpu_hwid[i].node_id;
>> +       }
>> +
>> +       return NUMA_NO_NODE;
>> +}
>> +
>> +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
>> +{
>> +       unsigned long madt_end, entry;
>> +       struct acpi_table_madt *madt;
>> +       acpi_size tbl_size;
>> +
>> +       if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
>> +                       (struct acpi_table_header **)&madt, &tbl_size)))
>> +               return -ENODEV;
>> +
>> +       entry = (unsigned long)madt;
>> +       madt_end = entry + madt->header.length;
>> +
>> +       /* Parse all entries looking for a match. */
>> +       entry += sizeof(struct acpi_table_madt);
>> +       while (entry + sizeof(struct acpi_subtable_header) < madt_end) {
>> +               struct acpi_subtable_header *header =
>> +                       (struct acpi_subtable_header *)entry;
>> +
>> +               if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
>> +                       struct acpi_madt_generic_interrupt *gicc =
>> +                               container_of(header,
>> +                               struct acpi_madt_generic_interrupt, header);
>> +
>> +                       if ((gicc->flags & ACPI_MADT_ENABLED) &&
>> +                           (gicc->uid == acpi_id)) {
>> +                               *mpidr = gicc->arm_mpidr;
>> +                               early_acpi_os_unmap_memory(madt, tbl_size);
>> +                               return 0;
>> +                       }
>> +               }
>> +               entry += header->length;
>> +       }
>> +
>> +       early_acpi_os_unmap_memory(madt, tbl_size);
>> +       return -ENODEV;
>> +}
>> +
>> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
>> +void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
>> +{
>> +       int pxm, node;
>> +       u64 mpidr;
>> +
>> +       if (srat_disabled())
>> +               return;
>> +
>> +       if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) {
>> +               pr_err("SRAT: Invalid SRAT header length: %d\n",
>> +                       pa->header.length);
>> +               bad_srat();
>> +               return;
>> +       }
>> +
>> +       if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
>> +               return;
>> +
>> +       if (cpus_in_srat >= NR_CPUS) {
>> +               pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n",
>> +                            NR_CPUS);
>> +               return;
>> +       }
>> +
>> +       pxm = pa->proximity_domain;
>> +       node = acpi_map_pxm_to_node(pxm);
>> +
>> +       if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
>> +               pr_err("SRAT: Too many proximity domains %d\n", pxm);
>> +               bad_srat();
>> +               return;
>> +       }
>> +
>> +       if (get_mpidr_in_madt(pa->acpi_processor_uid, &mpidr)) {
>> +               pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n",
>> +                       pxm, pa->acpi_processor_uid);
>> +               bad_srat();
>> +               return;
>> +       }
>> +
>> +       early_node_cpu_hwid[cpus_in_srat].node_id = node;
>> +       early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
>> +       node_set(node, numa_nodes_parsed);
>> +       cpus_in_srat++;
>> +       pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
>> +               pxm, mpidr, node, cpus_in_srat);
>> +}
>> +
>> +int __init arm64_acpi_numa_init(void)
>> +{
>> +       int ret;
>> +
>> +       ret = acpi_numa_init();
>> +       if (ret)
>> +               return ret;
>> +
>> +       return srat_disabled() ? -EINVAL : 0;
>> +}
>> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
>> index bebc4c6..6c7ef8f 100644
>> --- a/arch/arm64/kernel/smp.c
>> +++ b/arch/arm64/kernel/smp.c
>> @@ -524,6 +524,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
>>          */
>>         acpi_set_mailbox_entry(cpu_count, processor);
>>
>> +       early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid));
>> +
>>         cpu_count++;
>>  }
>>
>> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
>> index 6cb03f9..fc15186 100644
>> --- a/arch/arm64/mm/numa.c
>> +++ b/arch/arm64/mm/numa.c
>> @@ -17,6 +17,7 @@
>>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>>   */
>>
>> +#include <linux/acpi.h>
>>  #include <linux/bootmem.h>
>>  #include <linux/memblock.h>
>>  #include <linux/module.h>
>> @@ -388,7 +389,9 @@ static int __init dummy_numa_init(void)
>>  void __init arm64_numa_init(void)
>>  {
>>         if (!numa_off) {
>> -               if (!numa_init(of_numa_init))
>> +               if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
>> +                       return;
>> +               if (acpi_disabled && !numa_init(of_numa_init))
>>                         return;
>>         }
>>
> On top of the latest version of the dt-based numa patch, if 'numa=off'
> specified in the command line,
> this function will fallback to invoke dummy_numa_init(), which give
> rise the question here is, do we need to
> touch any ACPI related stuff in the case? If not, then the output

no, ACPI is not fallback configuration for DT and vice versa.

> message "No NUMA configuration found" followed
> seems is not necessary since it's a little bit confusing in case of
> numa has already been turned off explicitly.

thanks, this print can be moved out.
from function dummy_numa_init and it can be added in function arm64_numa_init
as a last line of if (!numa_off) to indicate, ACPI/DT based NUMA
configuration failed.

more appropriate would be,
pr_info("%s\n", "NUMA configuration failed or not found");

thanks
Ganapat

>
> Thanks,
> Dennis
>
>> --
>> 1.7.11.7
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
>> the body of a message to majordomo at vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



More information about the linux-arm-kernel mailing list