numa init的从bootmem_init->arm64_numa_init
void __init arm64_numa_init(void)
{
int ret = -ENODEV;
if (!numa_off)
ret = numa_init(acpi_disabled ? arm64_of_numa_init : arm64_acpi_numa_init);
if (ret)
numa_init(dummy_numa_init);
}
arm64_numa_init 首先会判断numa_off 是否为0,如果为1 则不在进行numa init,而numa_off 是通过
bootloader传递参数给kernel的.
static __init int numa_parse_early_param(char *opt)
{
if (!opt)
return -EINVAL;
if (!strncmp(opt, "off", 3)) {
pr_info("%s\n", "NUMA turned off");
numa_off = 1;
}
return 0;
}
early_param("numa", numa_parse_early_param);
如果传递numa=off,则kernel不进行numa init。
在arm64_numa_init 中然后调用numa_init 来做numa init,有要区分bootloader是device tree 传递参数
给kernel还是acpi,具体是由acpi_disabled来决定的,我们这里以acpi为例
int __init arm64_acpi_numa_init(void)
{
int ret;
ret = acpi_numa_init();
if (ret)
return ret;
return srat_disabled() ? -EINVAL : 0;
}
又是调用acpi_numa_init,这个函数比较长,我们分段来看
int __init acpi_numa_init(void)
{
int cnt = 0;
if (acpi_disabled)
return -EINVAL;
/*
* Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
* SRAT cpu entries could have different order with that in MADT.
* So go over all cpu entries in SRAT to get apicid to node mapping.
*/
/* SRAT: Static Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
acpi_parse_x2apic_affinity, 0);
acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
acpi_parse_processor_affinity, 0);
acpi_table_parse_srat(ACPI_SRAT_TYPE_GICC_AFFINITY,
acpi_parse_gicc_affinity, 0);
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
}
/* SLIT: System Locality Information Table */
acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
acpi_numa_arch_fixup();
if (cnt < 0)
return cnt;
else if (!parsed_numa_memblks)
return -ENOENT;
return 0;
}
首先再次判断acpi_disabled是否为1,为1 的话直接返回。
这个变量在disable_acpi 中赋值为1
static inline void disable_acpi(void)
{
acpi_disabled = 1;
acpi_pci_disabled = 1;
acpi_noirq = 1;
}
disable_acpi 又是被acpi_boot_table_init 调用
void __init acpi_boot_table_init(void)
{
if (acpi_table_init() || acpi_fadt_sanity_check()) {
pr_err("Failed to init ACPI tables\n");
if (!param_acpi_force)
disable_acpi();
}
}
而acpi_boot_table_init 是在arch/arm64/kernel/setup.c中调用,从acpi_boot_table_init
可以看到如果acpi_table_init 或者acpi_fadt_sanity_check 且,没有param_acpi_force的时候
就会设置acpi_disabled =1;
首先看看acpi_numa_init 中if 条件的acpi_table_parse ,
其中#define ACPI_SIG_SRAT "SRAT" /* System Resource Affinity Table */
#define ACPI_SIG_MADT "APIC" /* Multiple APIC Description Table */
int __init acpi_table_parse(char *id, acpi_tbl_table_handler handler)
{
struct acpi_table_header *table = NULL;
acpi_size tbl_size;
if (strncmp(id, ACPI_SIG_MADT, 4) == 0)
acpi_get_table_with_size(id, acpi_apic_instance, &table, &tbl_size);
else
acpi_get_table_with_size(id, 0, &table, &tbl_size);
if (table) {
handler(table);
early_acpi_os_unmap_memory(table, tbl_size);
return 0;
} else
return -ENODEV;
}
很明显if (strncmp(id, ACPI_SIG_MADT, 4) == 0) 这个条件不成立,则调用acpi_get_table_with_size(id, 0, &table, &tbl_size);
其中的id就是"SRAT"
acpi_status
acpi_get_table_with_size(char *signature,
u32 instance, struct acpi_table_header **out_table,
acpi_size *tbl_size)
{
u32 i;
u32 j;
acpi_status status;
/* Walk the root table list */
for (i = 0, j = 0; i < acpi_gbl_root_table_list.current_table_count;
i++) {
if (!ACPI_COMPARE_NAME
(&(acpi_gbl_root_table_list.tables[i].signature),
signature)) {
continue;
}
if (++j < instance) {
continue;
}
status =
acpi_tb_validate_table(&acpi_gbl_root_table_list.tables[i]);
if (ACPI_SUCCESS(status)) {
*out_table = acpi_gbl_root_table_list.tables[i].pointer;
*tbl_size = acpi_gbl_root_table_list.tables[i].length;
}
if (!acpi_gbl_permanent_mmap) {
acpi_gbl_root_table_list.tables[i].pointer = NULL;
}
return (status);
}
return (AE_NOT_FOUND);
}
这个函数会遍历acpi_gbl_root_table_list,然后比较signature是否相等,也就是查找那么是"SRAT"的acpi 表
找到后就将这个表的起始地址和length返回
if (ACPI_SUCCESS(status)) {
*out_table = acpi_gbl_root_table_list.tables[i].pointer;
*tbl_size = acpi_gbl_root_table_list.tables[i].length;
}
回到acpi_table_parse 函数,如果table 也就是acpi_get_table_with_size 中的out_table 不为NULL的话
就调用handle函数,即
if (table) {
handler(table);
early_acpi_os_unmap_memory(table, tbl_size);
return 0;
}
这里handler是acpi_parse_srat
static int __init acpi_parse_srat(struct acpi_table_header *table)
{
struct acpi_table_srat *srat = (struct acpi_table_srat *)table;
acpi_srat_revision = srat->header.revision;
/* Real work done in acpi_table_parse_srat below. */
return 0;
}
这个函数肯定会返回0,也就是说只要能找到"SRAT"表,if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) 这个条件一定成立
acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
acpi_parse_x2apic_affinity, 0);
acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
acpi_parse_processor_affinity, 0);
acpi_table_parse_srat(ACPI_SRAT_TYPE_GICC_AFFINITY,
acpi_parse_gicc_affinity, 0);
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
这四个函数就是分别parse srat表中的子选项x2apic_affinity,CPU_AFFINITY,GICC_AFFINITY,MEMORY_AFFINITY,后面再详述.
#define ACPI_SIG_SLIT "SLIT" /* System Locality Distance Information Table */
/* SLIT: System Locality Information Table */
acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
这个函数就是在找SLIT表,其handle是acpi_parse_slit
static int __init acpi_parse_slit(struct acpi_table_header *table)
{
struct acpi_table_slit *slit = (struct acpi_table_slit *)table;
acpi_numa_slit_init(slit);
return 0;
}
调用acpi_numa_slit_init
void __init __weak acpi_numa_slit_init(struct acpi_table_slit *slit)
{
int i, j;
for (i = 0; i < slit->locality_count; i++) {
const int from_node = pxm_to_node(i);
if (from_node == NUMA_NO_NODE)
continue;
for (j = 0; j < slit->locality_count; j++) {
const int to_node = pxm_to_node(j);
if (to_node == NUMA_NO_NODE)
continue;
numa_set_distance(from_node, to_node,
slit->entry[slit->locality_count * i + j]);
pr_debug("SLIT: Distance[%d][%d] = %d\n",
from_node, to_node,
slit->entry[slit->locality_count * i + j]);
}
}
}
主要调用numa_set_distance 设置numa distance。
最后一个函数是acpi_numa_arch_fixup。这个函数arm64 没有定义是空函数.