[RFC PATCH v2] hw/arm/boot: Add support for NUMA on ARM64
Shannon Zhao
zhaoshenglong at huawei.com
Wed Dec 31 01:59:58 PST 2014
Add support for NUMA on ARM64. Tested successfully running a guest
Linux kernel with the following patch applied:
- arm64:numa: adding numa support for arm64 platforms.
http://www.spinics.net/lists/arm-kernel/msg365316.html
Changes v1 ... v2:
Take into account Peter's comments:
* rename virt_memory_init to arm_generate_memory_dtb
* move arm_generate_memory_dtb to boot.c and make it a common func
* use a struct numa_map to generate numa dtb
Example qemu command line:
qemu-system-aarch64 \
-enable-kvm -smp 4\
-kernel Image \
-m 512 -machine virt,kernel_irqchip=on \
-initrd guestfs.cpio.gz \
-cpu host -nographic \
-numa node,mem=256M,cpus=0-1,nodeid=0 \
-numa node,mem=256M,cpus=2-3,nodeid=1 \
-append "console=ttyAMA0 root=/dev/ram"
Todo:
1)The NUMA nodes information in DT is not finalized yet, so this
patch might need to be further modified to follow any changes in it.
2)Consider IO-NUMA as well
Please refer to the following url for NUMA DT node details:
- Documentation: arm64/arm: dt bindings for numa.
http://www.spinics.net/lists/arm-kernel/msg380200.html
Example: 2 Node system each having 2 CPUs and a Memory
numa-map {
#address-cells = <2>;
#size-cells = <1>;
#node-count = <2>;
mem-map = <0x0 0x40000000 0>,
<0x0 0x50000000 1>;
cpu-map = <0 1 0>,
<2 3 1>;
node-matrix = <0 0 10>,
<0 1 20>,
<1 0 20>,
<1 1 10>;
};
- mem-map: This property defines the association between a range of
memory and the proximity domain/numa node to which it belongs.
- cpu-map: This property defines the association of range of processors
(range of cpu ids) and the proximity domain to which
the processor belongs.
- node-matrix: This table provides a matrix that describes the relative
distance (memory latency) between all System Localities.
The value of each Entry[i j distance] in node-matrix table,
where i represents a row of a matrix and j represents a
column of a matrix, indicates the relative distances
from Proximity Domain/Numa node i to every other
node j in the system (including itself).
Signed-off-by: Shannon Zhao <zhaoshenglong at huawei.com>
---
hw/arm/boot.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
hw/arm/virt.c | 7 +---
2 files changed, 97 insertions(+), 8 deletions(-)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 0014c34..df33f4f 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -312,6 +312,100 @@ static void set_kernel_args_old(const struct arm_boot_info *info)
}
}
+static int arm_generate_memory_dtb(void *fdt, const struct arm_boot_info *binfo,
+ uint32_t acells, uint32_t scells)
+{
+ CPUState *cpu;
+ int min_cpu = 0, max_cpu = 0;
+ int i = 0, j = 0, k = 0, len = 20;
+ int size = 6;
+ int size_mem = nb_numa_nodes * size;
+ int size_matrix = nb_numa_nodes * size_mem;
+
+ if (!nb_numa_nodes) {
+ qemu_fdt_add_subnode(fdt, "/memory");
+ qemu_fdt_setprop_string(fdt, "/memory", "device_type", "memory");
+ return qemu_fdt_setprop_sized_cells(fdt, "/memory", "reg",
+ acells, binfo->loader_start,
+ scells, binfo->ram_size);
+ }
+
+ struct {
+ uint64_t mem_map[size_mem];
+ uint64_t cpu_map[size_mem];
+ uint64_t node_matrix[size_matrix];
+ } numa_map;
+
+ hwaddr mem_base = binfo->loader_start;
+
+ qemu_fdt_add_subnode(fdt, "/numa-map");
+ qemu_fdt_setprop_cell(fdt, "/numa-map", "#address-cells", 0x2);
+ qemu_fdt_setprop_cell(fdt, "/numa-map", "#size-cells", 0x1);
+ qemu_fdt_setprop_cell(fdt, "/numa-map", "#node-count", 0x2);
+
+ for (i = 0; i < nb_numa_nodes; i++) {
+ /* Generate mem_map */
+ char *nodename;
+ nodename = g_strdup_printf("/memory@%" PRIx64, mem_base);
+ qemu_fdt_add_subnode(fdt, nodename);
+ qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory");
+ qemu_fdt_setprop_sized_cells(fdt, nodename, "reg",
+ acells, mem_base,
+ scells, numa_info[i].node_mem - 1);
+ numa_map.mem_map[0 + size * i] = 1;
+ numa_map.mem_map[1 + size * i] = 0x0;
+ numa_map.mem_map[2 + size * i] = 1;
+ numa_map.mem_map[3 + size * i] = mem_base;
+ numa_map.mem_map[4 + size * i] = 1;
+ numa_map.mem_map[5 + size * i] = i;
+
+ mem_base += numa_info[i].node_mem;
+ g_free(nodename);
+
+ /* Generate cpu_map */
+ CPU_FOREACH(cpu) {
+ if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
+ if (cpu->cpu_index < min_cpu) {
+ min_cpu = cpu->cpu_index;
+ }
+ if (cpu->cpu_index > max_cpu) {
+ max_cpu = cpu->cpu_index;
+ }
+ }
+ }
+
+ numa_map.cpu_map[0 + size * i] = 1;
+ numa_map.cpu_map[1 + size * i] = min_cpu;
+ numa_map.cpu_map[2 + size * i] = 1;
+ numa_map.cpu_map[3 + size * i] = max_cpu;
+ numa_map.cpu_map[4 + size * i] = 1;
+ numa_map.cpu_map[5 + size * i] = i;
+ min_cpu = max_cpu + 1;
+
+ /* Generate node_matrix */
+ for (j = 0; j < nb_numa_nodes; j++) {
+ len = (i == j) ? 10 : 20;
+
+ numa_map.node_matrix[0 + size * k] = 1;
+ numa_map.node_matrix[1 + size * k] = i;
+ numa_map.node_matrix[2 + size * k] = 1;
+ numa_map.node_matrix[3 + size * k] = j;
+ numa_map.node_matrix[4 + size * k] = 1;
+ numa_map.node_matrix[5 + size * k] = len;
+ k++;
+ }
+ }
+
+ qemu_fdt_setprop_sized_cells_from_array(fdt, "/numa-map", "mem-map",
+ size_mem / 2, numa_map.mem_map);
+ qemu_fdt_setprop_sized_cells_from_array(fdt, "/numa-map", "cpu-map",
+ size_mem / 2, numa_map.cpu_map);
+ qemu_fdt_setprop_sized_cells_from_array(fdt, "/numa-map", "node-matrix",
+ size_matrix / 2, numa_map.node_matrix);
+
+ return 0;
+}
+
/**
* load_dtb() - load a device tree binary image into memory
* @addr: the address to load the image at
@@ -385,9 +479,7 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
goto fail;
}
- rc = qemu_fdt_setprop_sized_cells(fdt, "/memory", "reg",
- acells, binfo->loader_start,
- scells, binfo->ram_size);
+ rc = arm_generate_memory_dtb(fdt, binfo, acells, scells);
if (rc < 0) {
fprintf(stderr, "couldn't set /memory/reg\n");
goto fail;
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 314e55b..7feddaf 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -170,8 +170,6 @@ static void create_fdt(VirtBoardInfo *vbi)
* to fill in necessary properties later
*/
qemu_fdt_add_subnode(fdt, "/chosen");
- qemu_fdt_add_subnode(fdt, "/memory");
- qemu_fdt_setprop_string(fdt, "/memory", "device_type", "memory");
/* Clock node, for the benefit of the UART. The kernel device tree
* binding documentation claims the PL011 node clock properties are
@@ -585,9 +583,8 @@ static void machvirt_init(MachineState *machine)
fdt_add_cpu_nodes(vbi);
fdt_add_psci_node(vbi);
- memory_region_init_ram(ram, NULL, "mach-virt.ram", machine->ram_size,
- &error_abort);
- vmstate_register_ram_global(ram);
+ memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
+ machine->ram_size);
memory_region_add_subregion(sysmem, vbi->memmap[VIRT_MEM].base, ram);
create_flash(vbi);
--
1.7.1
More information about the linux-arm-kernel
mailing list