[RFC PATCH] hw/arm/virt: Add support for NUMA on ARM64
Shannon Zhao
zhaoshenglong at huawei.com
Tue Dec 2 04:56:26 PST 2014
Add support for NUMA on ARM64. Tested successfully running a guest
Linux kernel with the following patch applied:
- arm64:numa: adding numa support for arm64 platforms.
http://www.spinics.net/lists/arm-kernel/msg365316.html
Example qemu command line:
qemu-system-aarch64 \
-enable-kvm -smp 4\
-kernel Image \
-m 512 -machine virt,kernel_irqchip=on \
-initrd guestfs.cpio.gz \
-cpu host -nographic \
-numa node,mem=256M,cpus=0-1,nodeid=0 \
-numa node,mem=256M,cpus=2-3,nodeid=1 \
-append "console=ttyAMA0 root=/dev/ram"
Todo:
1)The NUMA nodes information in DT is not finalized yet, so this
patch might need to be further modified to follow any changes in it.
2)Consider IO-NUMA as well
Please refer to the following url for NUMA DT node details:
- Documentation: arm64/arm: dt bindings for numa.
http://www.spinics.net/lists/arm-kernel/msg380200.html
Example: 2 Node system each having 2 CPUs and a Memory
numa-map {
#address-cells = <2>;
#size-cells = <1>;
#node-count = <2>;
mem-map = <0x0 0x40000000 0>,
<0x0 0x50000000 1>;
cpu-map = <0 1 0>,
<2 3 1>;
node-matrix = <0 0 10>,
<0 1 20>,
<1 0 20>,
<1 1 10>;
};
- mem-map: This property defines the association between a range of
memory and the proximity domain/numa node to which it belongs.
- cpu-map: This property defines the association of range of processors
(range of cpu ids) and the proximity domain to which
the processor belongs.
- node-matrix: This table provides a matrix that describes the relative
distance (memory latency) between all System Localities.
The value of each Entry[i j distance] in node-matrix table,
where i represents a row of a matrix and j represents a
column of a matrix, indicates the relative distances
from Proximity Domain/Numa node i to every other
node j in the system (including itself).
Signed-off-by: Shannon Zhao <zhaoshenglong at huawei.com>
---
hw/arm/boot.c | 25 ------------
hw/arm/virt.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 113 insertions(+), 32 deletions(-)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 0014c34..c20fee4 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -335,7 +335,6 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
{
void *fdt = NULL;
int size, rc;
- uint32_t acells, scells;
if (binfo->dtb_filename) {
char *filename;
@@ -369,30 +368,6 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
return 0;
}
- acells = qemu_fdt_getprop_cell(fdt, "/", "#address-cells");
- scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells");
- if (acells == 0 || scells == 0) {
- fprintf(stderr, "dtb file invalid (#address-cells or #size-cells 0)\n");
- goto fail;
- }
-
- if (scells < 2 && binfo->ram_size >= (1ULL << 32)) {
- /* This is user error so deserves a friendlier error message
- * than the failure of setprop_sized_cells would provide
- */
- fprintf(stderr, "qemu: dtb file not compatible with "
- "RAM size > 4GB\n");
- goto fail;
- }
-
- rc = qemu_fdt_setprop_sized_cells(fdt, "/memory", "reg",
- acells, binfo->loader_start,
- scells, binfo->ram_size);
- if (rc < 0) {
- fprintf(stderr, "couldn't set /memory/reg\n");
- goto fail;
- }
-
if (binfo->kernel_cmdline && *binfo->kernel_cmdline) {
rc = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
binfo->kernel_cmdline);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 78f618d..9d18a91 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -170,8 +170,6 @@ static void create_fdt(VirtBoardInfo *vbi)
* to fill in necessary properties later
*/
qemu_fdt_add_subnode(fdt, "/chosen");
- qemu_fdt_add_subnode(fdt, "/memory");
- qemu_fdt_setprop_string(fdt, "/memory", "device_type", "memory");
/* Clock node, for the benefit of the UART. The kernel device tree
* binding documentation claims the PL011 node clock properties are
@@ -235,6 +233,116 @@ static void fdt_add_psci_node(const VirtBoardInfo *vbi)
qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
}
+static int virt_memory_init(MachineState *machine,
+ MemoryRegion *system_memory,
+ const VirtBoardInfo *vbi)
+{
+ MemoryRegion *ram = g_new(MemoryRegion, 1);
+ CPUState *cpu;
+ int min_cpu = 0, max_cpu = 0;
+ int i, j, count, len;
+ uint32_t acells, scells;
+
+ acells = qemu_fdt_getprop_cell(vbi->fdt, "/", "#address-cells");
+ scells = qemu_fdt_getprop_cell(vbi->fdt, "/", "#size-cells");
+ if (acells == 0 || scells == 0) {
+ fprintf(stderr, "dtb file invalid (#address-cells or #size-cells 0)\n");
+ goto fail;
+ }
+
+ if (scells < 2 && machine->ram_size >= (1ULL << 32)) {
+ /* This is user error so deserves a friendlier error message
+ * than the failure of setprop_sized_cells would provide
+ */
+ fprintf(stderr, "qemu: dtb file not compatible with "
+ "RAM size > 4GB\n");
+ goto fail;
+ }
+
+ memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
+ machine->ram_size);
+ memory_region_add_subregion(system_memory, vbi->memmap[VIRT_MEM].base, ram);
+
+ hwaddr mem_base = vbi->memmap[VIRT_MEM].base;
+
+ if (!nb_numa_nodes) {
+ qemu_fdt_add_subnode(vbi->fdt, "/memory");
+ qemu_fdt_setprop_string(vbi->fdt, "/memory", "device_type", "memory");
+ qemu_fdt_setprop_sized_cells(vbi->fdt, "/memory", "reg",
+ acells, mem_base,
+ scells, machine->ram_size);
+ return 0;
+ }
+
+ qemu_fdt_add_subnode(vbi->fdt, "/numa-map");
+ qemu_fdt_setprop_cell(vbi->fdt, "/numa-map", "#address-cells", 0x2);
+ qemu_fdt_setprop_cell(vbi->fdt, "/numa-map", "#size-cells", 0x1);
+ qemu_fdt_setprop_cell(vbi->fdt, "/numa-map", "#node-count", 0x2);
+
+ uint64_t *mem_map = g_malloc0(nb_numa_nodes * sizeof(uint64_t) * 6);
+ uint64_t *cpu_map = g_malloc0(nb_numa_nodes * sizeof(uint64_t) * 6);
+ uint64_t *node_matrix = g_malloc0(nb_numa_nodes * nb_numa_nodes
+ * sizeof(uint64_t) * 6);
+
+ for (i = 0; i < nb_numa_nodes; i++) {
+ uint64_t buffer[6] = {1, 0x00000000, 1, mem_base, 1, i};
+ char *nodename;
+ nodename = g_strdup_printf("/memory@%" PRIx64, mem_base);
+ qemu_fdt_add_subnode(vbi->fdt, nodename);
+ qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "memory");
+ qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ acells, mem_base,
+ scells, numa_info[i].node_mem-1);
+ memcpy(mem_map + 6 * i, buffer, 6 * sizeof(*buffer));
+ mem_base += numa_info[i].node_mem;
+ g_free(nodename);
+ }
+ qemu_fdt_setprop_sized_cells_from_array(vbi->fdt, "/numa-map", "mem-map",
+ (nb_numa_nodes * 6) / 2, mem_map);
+
+ for (i = 0; i < nb_numa_nodes; i++) {
+ CPU_FOREACH(cpu) {
+ if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
+ if (cpu->cpu_index < min_cpu) {
+ min_cpu = cpu->cpu_index;
+ }
+ if (cpu->cpu_index > max_cpu) {
+ max_cpu = cpu->cpu_index;
+ }
+ }
+ }
+
+ uint64_t buffer[6] = {1, min_cpu, 1, max_cpu, 1, i};
+ memcpy(cpu_map + 6 * i, buffer, 6 * sizeof(*buffer));
+ min_cpu = max_cpu + 1;
+ }
+
+ qemu_fdt_setprop_sized_cells_from_array(vbi->fdt, "/numa-map", "cpu-map",
+ (nb_numa_nodes * 6) / 2, cpu_map);
+ count = 0;
+ for (i = 0; i < nb_numa_nodes; i++) {
+ for (j = 0; j < nb_numa_nodes; j++) {
+ len = 20;
+ if (i == j) {
+ len = 10;
+ }
+ uint64_t buffer[6] = {1, i, 1, j, 1, len};
+ memcpy(node_matrix + 6 * count, buffer, 6 * sizeof(*buffer));
+ count++;
+ }
+ }
+ qemu_fdt_setprop_sized_cells_from_array(vbi->fdt, "/numa-map",
+ "node-matrix", (nb_numa_nodes * nb_numa_nodes * 6) / 2, node_matrix);
+
+ g_free(mem_map);
+ g_free(cpu_map);
+ g_free(node_matrix);
+
+ return 0;
+fail:
+ return -1;
+}
+
static void fdt_add_timer_nodes(const VirtBoardInfo *vbi)
{
/* Note that on A15 h/w these interrupts are level-triggered,
@@ -532,7 +640,6 @@ static void machvirt_init(MachineState *machine)
qemu_irq pic[NUM_IRQS];
MemoryRegion *sysmem = get_system_memory();
int n;
- MemoryRegion *ram = g_new(MemoryRegion, 1);
const char *cpu_model = machine->cpu_model;
VirtBoardInfo *vbi;
@@ -585,10 +692,9 @@ static void machvirt_init(MachineState *machine)
fdt_add_cpu_nodes(vbi);
fdt_add_psci_node(vbi);
- memory_region_init_ram(ram, NULL, "mach-virt.ram", machine->ram_size,
- &error_abort);
- vmstate_register_ram_global(ram);
- memory_region_add_subregion(sysmem, vbi->memmap[VIRT_MEM].base, ram);
+ if (virt_memory_init(machine, sysmem, vbi) < 0) {
+ exit(1);
+ }
create_flash(vbi);
--
1.7.1
More information about the linux-arm-kernel
mailing list