[PATCH 3/4] ACPI: RISCV: Add NUMA support based on SRAT and SLIT

Sunil V L sunilvl at ventanamicro.com
Tue Mar 5 02:06:32 PST 2024


On Tue, Mar 05, 2024 at 05:54:03PM +0800, Haibo Xu wrote:
> On Tue, Mar 5, 2024 at 1:24 PM Sunil V L <sunilvl at ventanamicro.com> wrote:
> >
> > On Wed, Jan 31, 2024 at 10:32:00AM +0800, Haibo Xu wrote:
> > > Add acpi_numa.c file to enable parse NUMA information from
> > > ACPI SRAT and SLIT tables. SRAT table provide CPUs(Hart) and
> > > memory nodes to proximity domain mapping, while SLIT table
> > > provide the distance metrics between proximity domains.
> > >
> > > Signed-off-by: Haibo Xu <haibo1.xu at intel.com>
> > > ---
> > >  arch/riscv/include/asm/acpi.h |  15 +++-
> > >  arch/riscv/kernel/Makefile    |   1 +
> > >  arch/riscv/kernel/acpi.c      |   5 --
> > >  arch/riscv/kernel/acpi_numa.c | 133 ++++++++++++++++++++++++++++++++++
> > >  arch/riscv/kernel/setup.c     |   4 +-
> > >  arch/riscv/kernel/smpboot.c   |   2 -
> > >  drivers/acpi/numa/srat.c      |   3 +-
> > >  include/linux/acpi.h          |   4 +
> > >  8 files changed, 156 insertions(+), 11 deletions(-)
> > >  create mode 100644 arch/riscv/kernel/acpi_numa.c
> > >
> > > diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h
> > > index 7dad0cf9d701..e0a1f84404f3 100644
> > > --- a/arch/riscv/include/asm/acpi.h
> > > +++ b/arch/riscv/include/asm/acpi.h
> > > @@ -61,11 +61,14 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) { }
> > >
> > >  void acpi_init_rintc_map(void);
> > >  struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu);
> > > -u32 get_acpi_id_for_cpu(int cpu);
> > > +static inline u32 get_acpi_id_for_cpu(int cpu)
> > > +{
> > > +     return acpi_cpu_get_madt_rintc(cpu)->uid;
> > > +}
> > > +
> > >  int acpi_get_riscv_isa(struct acpi_table_header *table,
> > >                      unsigned int cpu, const char **isa);
> > >
> > > -static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; }
> > >  void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size,
> > >                            u32 *cboz_size, u32 *cbop_size);
> > >  #else
> > > @@ -87,4 +90,12 @@ static inline void acpi_get_cbo_block_size(struct acpi_table_header *table,
> > >
> > >  #endif /* CONFIG_ACPI */
> > >
> > > +#ifdef CONFIG_ACPI_NUMA
> > > +int acpi_numa_get_nid(unsigned int cpu);
> > > +void acpi_map_cpus_to_nodes(void);
> > > +#else
> > > +static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; }
> > > +static inline void acpi_map_cpus_to_nodes(void) { }
> > > +#endif /* CONFIG_ACPI_NUMA */
> > > +
> > >  #endif /*_ASM_ACPI_H*/
> > > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > > index f71910718053..5d3e9cf89b76 100644
> > > --- a/arch/riscv/kernel/Makefile
> > > +++ b/arch/riscv/kernel/Makefile
> > > @@ -105,3 +105,4 @@ obj-$(CONFIG_COMPAT)              += compat_vdso/
> > >
> > >  obj-$(CONFIG_64BIT)          += pi/
> > >  obj-$(CONFIG_ACPI)           += acpi.o
> > > +obj-$(CONFIG_ACPI_NUMA)      += acpi_numa.o
> > > diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
> > > index e619edc8b0cc..040bdbfea2b4 100644
> > > --- a/arch/riscv/kernel/acpi.c
> > > +++ b/arch/riscv/kernel/acpi.c
> > > @@ -191,11 +191,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu)
> > >       return &cpu_madt_rintc[cpu];
> > >  }
> > >
> > > -u32 get_acpi_id_for_cpu(int cpu)
> > > -{
> > > -     return acpi_cpu_get_madt_rintc(cpu)->uid;
> > > -}
> > > -
> > >  /*
> > >   * __acpi_map_table() will be called before paging_init(), so early_ioremap()
> > >   * or early_memremap() should be called here to for ACPI table mapping.
> > > diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c
> > > new file mode 100644
> > > index 000000000000..493642a61457
> > > --- /dev/null
> > > +++ b/arch/riscv/kernel/acpi_numa.c
> > > @@ -0,0 +1,133 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * ACPI 6.6 based NUMA setup for RISCV
> > > + * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c
> > > + *
> > > + * Copyright 2004 Andi Kleen, SuSE Labs.
> > > + * Copyright (C) 2013-2016, Linaro Ltd.
> > > + *           Author: Hanjun Guo <hanjun.guo at linaro.org>
> > > + * Copyright (C) 2024 Intel Corporation.
> > > + *
> > > + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
> > > + *
> > > + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
> > > + * Assumes all memory regions belonging to a single proximity domain
> > > + * are in one chunk. Holes between them will be included in the node.
> > > + */
> > > +
> > > +#define pr_fmt(fmt) "ACPI: NUMA: " fmt
> > > +
> > > +#include <linux/acpi.h>
> > > +#include <linux/bitmap.h>
> > > +#include <linux/kernel.h>
> > > +#include <linux/mm.h>
> > > +#include <linux/memblock.h>
> > > +#include <linux/mmzone.h>
> > > +#include <linux/module.h>
> > > +#include <linux/topology.h>
> > > +
> > > +#include <asm/numa.h>
> > > +
> > > +static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE };
> > > +
> > > +int __init acpi_numa_get_nid(unsigned int cpu)
> > > +{
> > > +     return acpi_early_node_map[cpu];
> > > +}
> > > +
> > > +static inline int get_cpu_for_acpi_id(u32 uid)
> > > +{
> > > +     int cpu;
> > > +
> > > +     for (cpu = 0; cpu < nr_cpu_ids; cpu++)
> > > +             if (uid == get_acpi_id_for_cpu(cpu))
> > > +                     return cpu;
> > > +
> > > +     return -EINVAL;
> > > +}
> > > +
> > > +static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header,
> > > +                                   const unsigned long end)
> >
> > Please check alignment.
> >
> 
> Sure.
> 
> > > +{
> > > +     struct acpi_srat_rintc_affinity *pa;
> > > +     int cpu, pxm, node;
> > > +
> > > +     if (srat_disabled())
> > > +             return -EINVAL;
> > > +
> > > +     pa = (struct acpi_srat_rintc_affinity *)header;
> > > +     if (!pa)
> > > +             return -EINVAL;
> > > +
> > > +     if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED))
> > > +             return 0;
> > > +
> > > +     pxm = pa->proximity_domain;
> > > +     node = pxm_to_node(pxm);
> > > +
> > > +     /*
> > > +      * If we can't map the UID to a logical cpu this
> > > +      * means that the UID is not part of possible cpus
> > > +      * so we do not need a NUMA mapping for it, skip
> > > +      * the SRAT entry and keep parsing.
> > > +      */
> > > +     cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid);
> > > +     if (cpu < 0)
> > > +             return 0;
> > > +
> > > +     acpi_early_node_map[cpu] = node;
> > > +     pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm,
> > > +             cpuid_to_hartid_map(cpu), node);
> > > +
> > > +     return 0;
> > > +}
> > > +
> > > +void __init acpi_map_cpus_to_nodes(void)
> > > +{
> > > +     int i;
> > > +
> > > +     /*
> > > +      * In ACPI, SMP and CPU NUMA information is provided in separate
> > > +      * static tables, namely the MADT and the SRAT.
> > > +      *
> > > +      * Thus, it is simpler to first create the cpu logical map through
> > > +      * an MADT walk and then map the logical cpus to their node ids
> > > +      * as separate steps.
> > > +      */
> > > +     acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat),
> > > +                                         ACPI_SRAT_TYPE_RINTC_AFFINITY,
> > > +                                         acpi_parse_rintc_pxm, 0);
> > > +
> > Alignment here as well.
> >
> 
> Sure.
> 
> > > +     for (i = 0; i < nr_cpu_ids; i++)
> > > +             early_map_cpu_to_node(i, acpi_numa_get_nid(i));
> > > +}
> > > +
> > > +/* Callback for Proximity Domain -> logical node ID mapping */
> > > +void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa)
> > > +{
> > > +     int pxm, node;
> > > +
> > > +     if (srat_disabled())
> > > +             return;
> > > +
> > > +     if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) {
> > > +             pr_err("SRAT: Invalid SRAT header length: %d\n",
> > > +                     pa->header.length);
> > Can we merge these into single line?
> >
> > > +             bad_srat();
> > > +             return;
> > > +     }
> > > +
> > > +     if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED))
> > > +             return;
> > > +
> > > +     pxm = pa->proximity_domain;
> > > +     node = acpi_map_pxm_to_node(pxm);
> > > +
> > > +     if (node == NUMA_NO_NODE) {
> > > +             pr_err("SRAT: Too many proximity domains %d\n", pxm);
> > > +             bad_srat();
> > > +             return;
> > > +     }
> > > +
> > > +     node_set(node, numa_nodes_parsed);
> > > +}
> > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > index 4f73c0ae44b2..a2cde65b69e9 100644
> > > --- a/arch/riscv/kernel/setup.c
> > > +++ b/arch/riscv/kernel/setup.c
> > > @@ -281,8 +281,10 @@ void __init setup_arch(char **cmdline_p)
> > >       setup_smp();
> > >  #endif
> > >
> > > -     if (!acpi_disabled)
> > > +     if (!acpi_disabled) {
> > >               acpi_init_rintc_map();
> > > +             acpi_map_cpus_to_nodes();
> > Is it not possible to fill up both in single parsing of MADT?
> >
> 
> I think it's not possible to fill both in a single MADT parse since
> the NUMA info is provided in a separate SRAT table.
> 
Ahh, yes. My mistake. This looks good to me.

Thanks,
Sunil



More information about the linux-riscv mailing list