linux内存管理代码有些改变,记录下来。
首先是初始化内存,从 init/main.c 中的__init start_kernel()函数开始--》build_all_zonelists(NULL, NULL); -->> __build_all_zonelists(NULL) -->>
/* return values int ....just for stop_machine() */
static int __build_all_zonelists(void *data)//从_ref build_all_zonelists()函数中调用__build_all_zonelists(NULL)
{
int nid;
int cpu;
pg_data_t *self = data;
#ifdef CONFIG_NUMA
memset(node_load, 0, sizeof(node_load));//以节点为下标的数组
#endif
if (self && !node_online(self->node_id)) {//初始化时,self=data=NULL,不执行
build_zonelists(self);
build_zonelist_cache(self);
}
for_each_online_node(nid) {//循环找到系统的所有节点
pg_data_t *pgdat = NODE_DATA(nid);
build_zonelists(pgdat);
build_zonelist_cache(pgdat);
}
/*
* Initialize the boot_pagesets that are going to be used
* for bootstrapping processors. The real pagesets for
* each zone will be allocated later when the per cpu
* allocator is available.
*
* boot_pagesets are used also for bootstrapping offline
* cpus if the system is already booted because the pagesets
* are needed to initialize allocators on a specific cpu too.
* F.e. the percpu allocator needs the page allocator which
* needs the percpu allocator in order to allocate its pagesets
* (a chicken-egg dilemma).
*/
for_each_possible_cpu(cpu) {
setup_pageset(&per_cpu(boot_pageset, cpu), 0);
#ifdef CONFIG_HAVE_MEMORYLESS_NODES
/*
* We now know the "local memory node" for each node--
* i.e., the node of the first zone in the generic zonelist.
* Set up numa_mem percpu variable for on-line cpus. During
* boot, only the boot cpu should be on-line; we'll init the
* secondary cpus' numa_mem as they come on-line. During
* node/memory hotplug, we'll fixup all on-line cpus.
*/
if (cpu_online(cpu))
set_cpu_numa_mem(cpu, local_memory_node(cpu_to_node(cpu)));
#endif
}
return 0;
}
static void build_zonelists(pg_data_t *pgdat)
{
int j, node, load;
enum zone_type i;
nodemask_t used_mask;
int local_node, prev_node;
struct zonelist *zonelist;
int order = current_zonelist_order;
/* initialize zonelists */
for (i = 0; i < MAX_ZONELISTS; i++) {//MAX_ZONELISTS = 2,初始化zonelist
zonelist = pgdat->node_zonelists + i;
zonelist->_zonerefs[0].zone = NULL;
zonelist->_zonerefs[0].zone_idx = 0;
}
/* NUMA-aware ordering of nodes */
local_node = pgdat->node_id;
load = nr_online_nodes;
prev_node = local_node;
nodes_clear(used_mask);
memset(node_order, 0, sizeof(node_order));//static int node_order[MAX_NUMNODES]
j = 0;
while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {//找一个离local_node最近距离的节点
/*
* We don't want to pressure a particular node.
* So adding penalty to the first node in same
* distance group to make it round-robin.
*/
if (node_distance(local_node, node) !=
node_distance(local_node, prev_node))
node_load[node] = load;
prev_node = node;
load--;
if (order == ZONELIST_ORDER_NODE)//前面开始函数中设置了,set_zonelist_order()
build_zonelists_in_node_order(pgdat, node);
else
node_order[j++] = node; /* remember order */
}//在local_node节点上会出现这样:local_node->zonelist[0]->_zonerefs[0~ node * node->node_zones] = n1.m n1.h n1.n n1.d32 n1.d n2.m n2.h n2.n n2.d32 n2.d ...
// 其中n1、n2代表节点,m = movable, h=highmem, n=normal, d32=DMA32, d=DMA
if (order == ZONELIST_ORDER_ZONE) {//是使用上面的node排序还是用zone排序,根据set_zonelist_order()函数来判断
/* calculate node order -- i.e., DMA last! */
build_zonelists_in_zone_order(pgdat, j);这种排序的结果和上面的不一样:local_node->zonelist[0]->_zonerefs[0~node * node->node_zones] = n1.m n2.m n3.m n1.h n2.h n3.h .....
}
build_thisnode_zonelists(pgdat);//给自己节点的排序
}
ZONELIST_ORDER_NODE :
/*
* Build zonelists ordered by node and zones within node.
* This results in maximum locality--normal zone overflows into local
* DMA zone, if any--but risks exhausting DMA zone.
*/
static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)//把node节点上的内存域排列到pgdat->zonelists中
{
int j;
struct zonelist *zonelist;
zonelist = &pgdat->node_zonelists[0];
for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)//条件不成立
;
j = build_zonelists_node(NODE_DATA(node), zonelist, j,
MAX_NR_ZONES - 1);
zonelist->_zonerefs[j].zone = NULL;//结束标识,为下次再调用该函数上面for循环使用,会紧接着本次结束位置开始
zonelist->_zonerefs[j].zone_idx = 0;
}
static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
int nr_zones, enum zone_type zone_type)
{
struct zone *zone;
BUG_ON(zone_type >= MAX_NR_ZONES);
zone_type++;
do {
zone_type--;
zone = pgdat->node_zones + zone_type;
if (populated_zone(zone)) {
zoneref_set_zone(zone,
&zonelist->_zonerefs[nr_zones++]);//从pgdat节点上的廉价内存开始全部存放到zonelist->_zonerefs数组中
check_highest_zone(zone_type);
}
} while (zone_type);
return nr_zones;
}
ZONELIST_ORDER_ZONE:
static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
{
int pos, j, node;
int zone_type; /* needs to be signed */
struct zone *z;
struct zonelist *zonelist;
zonelist = &pgdat->node_zonelists[0];
pos = 0;
for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
for (j = 0; j < nr_nodes; j++) {
node = node_order[j];
z = &NODE_DATA(node)->node_zones[zone_type];
if (populated_zone(z)) {
zoneref_set_zone(z,
&zonelist->_zonerefs[pos++]);
check_highest_zone(zone_type);
}
}
}
zonelist->_zonerefs[pos].zone = NULL;
zonelist->_zonerefs[pos].zone_idx = 0;
}
配置zonelists[1] 也就是本节点的zone排序:
/*
* Build gfp_thisnode zonelists
*/
static void build_thisnode_zonelists(pg_data_t *pgdat)
{
int j;
struct zonelist *zonelist;
zonelist = &pgdat->node_zonelists[1];
j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
zonelist->_zonerefs[j].zone = NULL;
zonelist->_zonerefs[j].zone_idx = 0;
}
//j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
int nr_zones, enum zone_type zone_type)
{
struct zone *zone;
BUG_ON(zone_type >= MAX_NR_ZONES);
zone_type++;
do {
zone_type--;
zone = pgdat->node_zones + zone_type;
if (populated_zone(zone)) {
zoneref_set_zone(zone,
&zonelist->_zonerefs[nr_zones++]);
check_highest_zone(zone_type);
}
} while (zone_type);
return nr_zones;
}
static void build_zonelist_cache(pg_data_t *pgdat)
{
struct zonelist *zonelist;
struct zonelist_cache *zlc;
struct zoneref *z;
zonelist = &pgdat->node_zonelists[0];
zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
for (z = zonelist->_zonerefs; z->zone; z++)
zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
}
主要还是自己看源代码吧,初始化后每个node节点中的node_zonelist是这样的:
m1 == node1 movable ; h1 == node1 highmen; n1 == node1 normal; d32.1 == node1 DMA32; d1 == node1 DMA
m2 == node2 movable; .........
ZONELIST_ORDER_NODE:pgdat->node_zonelists[0]->_zonerefs[0 ~ node * node->node_zones] = m1 h1 n1 d32.1 d1 m2 h2 n2 d32.2 d2.....
ZONELIST_ORDER_ZONE:pgdat->node_zonelists[0]->_zonerefs[0 ~ node * node->node_zones] = m1 m2 m3 h1 h2 h3 d32.1 d32.2 d32.3 d1 d2 d3
本节点:pgdat-> node_zonelist[1]->_zonerefs[0 ~ zone_type--] = m h n d32 d
有不正确的地方,欢迎指正,谢谢。
转载地址:http://blog.csdn.net/yuzhihui_no1/article/details/50759567