在acpi下,通过SRAT表来告诉kernel memory的信息.通过arm64_numa_init->arm64_acpi_numa_init->acpi_numa_init中的acpi_table_parse_srat 来添加bios告诉kernel的memory信息
int __init acpi_numa_init(void)
{
int cnt = 0;
if (acpi_disabled)
return -EINVAL;
/*
* Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
* SRAT cpu entries could have different order with that in MADT.
* So go over all cpu entries in SRAT to get apicid to node mapping.
*/
/* SRAT: Static Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
struct acpi_subtable_proc srat_proc[3];
memset(srat_proc, 0, sizeof(srat_proc));
srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
srat_proc[0].handler = acpi_parse_processor_affinity;
srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
srat_proc[1].handler = acpi_parse_x2apic_affinity;
srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
srat_proc[2].handler = acpi_parse_gicc_affinity;
acpi_table_parse_entries_array(ACPI_SIG_SRAT,
sizeof(struct acpi_table_srat),
srat_proc, ARRAY_SIZE(srat_proc), 0);
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
}
/* SLIT: System Locality Information Table */
acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
if (cnt < 0)
return cnt;
else if (!parsed_numa_memblks)
return -ENOENT;
return 0;
}
我们主要关注acpi_table_parse_srat
static int __init
acpi_table_parse_srat(enum acpi_srat_type id,
acpi_tbl_entry_handler handler, unsigned int max_entries)
{
return acpi_table_parse_entries(ACPI_SIG_SRAT,
sizeof(struct acpi_table_srat), id,
handler, max_entries);
}
acpi_table_parse_entries 会在acpi表中查找ACPI_SIG_SRAT且id为ACPI_SRAT_TYPE_MEMORY_AFFINITY,如果有的话,就调用handler,这里的handler就是acpi_parse_memory_affinity
而在acpi_numa_memory_affinity_init 中主要调用acpi_numa_memory_affinity_init
int __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
u64 start, end;
u32 hotpluggable;
int node, pxm;
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
node = acpi_map_pxm_to_node(pxm);
if (numa_add_memblk(node, start, end) < 0) {
pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
node, (unsigned long long) start,
(unsigned long long) end - 1);
goto out_err_bad_srat;
}
node_set(node, numa_nodes_parsed);
}
在acpi_numa_memory_affinity_init中通过得到一段memory的start和end以及所属的node之后就调用numa_add_memblk 来添加memblock
int __init numa_add_memblk(int nid, u64 start, u64 end)
{
int ret;
ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
if (ret < 0) {
pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n",
start, (end - 1), nid);
return ret;
}
node_set(nid, numa_nodes_parsed);
pr_info("Adding memblock [0x%llx - 0x%llx] on node %d\n",
start, (end - 1), nid);
return ret;
}
在numa_add_memblk 中就调用memblock_set_node为所属的node添加memory,这样从开机log中就可能看到bootloader 总共告诉kernel有几段memory.
[ 0.000000] NUMA: Adding memblock [0x0 - 0x3fffffff] on node 0
[ 0.000000] ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x3fffffff]
[ 0.000000] NUMA: Adding memblock [0x2000000000 - 0x2fffffffff] on node 1
[ 0.000000] ACPI: SRAT: Node 1 PXM 1 [mem 0x2000000000-0x2fffffffff]
[ 0.000000] NUMA: Adding memblock [0x1000000000 - 0x1fffffffff] on node 0
[ 0.000000] ACPI: SRAT: Node 0 PXM 0 [mem 0x1000000000-0x1fffffffff]
[ 0.000000] NUMA: Adding memblock [0x40000000000 - 0x4003fffffff] on node 2
[ 0.000000] ACPI: SRAT: Node 2 PXM 2 [mem 0x40000000000-0x4003fffffff]
[ 0.000000] NUMA: Adding memblock [0x42000000000 - 0x42fffffffff] on node 3
[ 0.000000] ACPI: SRAT: Node 3 PXM 3 [mem 0x42000000000-0x42fffffffff]
[ 0.000000] NUMA: Adding memblock [0x41000000000 - 0x41fffffffff] on node 2
[ 0.000000] ACPI: SRAT: Node 2 PXM 2 [mem 0x41000000000-0x41fffffffff]