slab系统初始化过程中,待所有cpu都完成初始化后,通过调用kmem_cache_init_late来函数完善cache_chain上每个struct kmem_cache实例的cpu缓存机制(包括cpu本地高速缓存和每个节点上的cpu共享缓存shared cache)
kmem_cache_init_late()
|---list_for_each_entry(cachep, &slab_caches, list)//遍历slab_caches全局链表中每个struct kmem_cache实例
enable_cpucache(cachep, GFP_NOWAIT)//对每个struct kmem_cache实例缓存机制进行完善
|--->cache_random_seq_create()//初始化struct kmem_cache实例num成员
|--->计算并设置struct kmem_cache实例的limit,batchcount,shared成员
|--->do_tune_cpucache()//struct kmem_cache实例缓存机制的完善和其node成员数组中数组项的初始化
|--->__do_tune_cpucache()
|--->alloc_kmem_cache_cpus()//分配新的本地高速缓存区域(从Per_cpu area分配)
|--->将kmem_cache实例的cpu_cache指向新分配的本地高速缓存区域(Per_CPU内存空间)
|---for_each_online_cpu(cpu)//遍历每个cpu
free_block()//释放对应cpu旧的本地缓存中缓存的slab obj到对应的slab链表中
|--->free_percpu()//释放旧的Per_CPU变量,struct kmem_cache实例的高速缓存cpu_cache
|--->setup_kmem_cache_nodes()//初始化node数组中每个kmem_cache_nod实例(共享缓存区域 更新和相关成员初始化)
|---for_each_online_node(node)//遍历每个内存节点
setup_kmem_cache_node()//对kmem_cache_nod实例进初始化
|--->alloc_arraycache()//为当前节点共享缓存区域分配空间
|--->将kmem_cache_node实例的shared成员指向新分配的共享缓存空间
|--->init_cache_node()//初始化kmem_cache_node实例的的3个slab链 表等
// mm/slab.c
/*
*1.完善全局链表slab_caches上所有struct kmem_cache实例的缓存机制:
* 根据slab obj大小,给本地cpu高速缓存和每个节点shared cache重新分配内存空间,并释放旧缓存缓存的
* slab obj(free_block)和旧缓存本身(kfree))
*2.初始化全局链表slab_caches上所有struct kmem_cache实例的node数组中每个struct kmem_cache_node实例的
* 相关数据(主要是每个内存节点中3种类型slab 链表的初始化)
*/
void __init kmem_cache_init_late(void)
{
struct kmem_cache *cachep;
slab_state = UP;
/* 6) resize the head arrays to their final sizes */
mutex_lock(&slab_mutex);
//遍历slab_caches上所有的slab cache描述符(struct kmem_cache结构体实例)
list_for_each_entry(cachep, &slab_caches, list)
/*
*完善当前slab cache描述符的cpu高速缓存机制,并对slab cache描述符的node成员中的每个struct kmem_cache_node
*实例进行初始化操作.
*1.对本地cpu高速缓存(cachep->cpu_cache),通过slab obj的大小重新计算array_cache每个成员的
* 值,然后为每个cpu重新分配一个array_cache实例(每cpu变量),并替换旧的arry_cache。
*2.若是NUMA结构,完善cpu共享高速缓存机制(分配足够的共享array_cache实例并完成地址关联).
*3.为slab cache描述符每个节点对应的struct kmem_cache_node实例的相关成员进行初始化操作,主要是完成3个
* slab链表的初始化
*/
if (enable_cpucache(cachep, GFP_NOWAIT))
BUG();
mutex_unlock(&slab_mutex);
/* Done! */
slab_state = FULL;
#ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
* node.
*
*/
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif
/*
* The reap timers are started later, with a module init call: That part
* of the kernel is not yet operational.
*/
}
/* Called with slab_mutex held always */
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
int err;
int limit = 0;
int shared = 0;
int batchcount = 0;
//
err = cache_random_seq_create(cachep, cachep->num, gfp);
if (err)
goto end;
//判断该slab cache描述符是否属于某一个memcg组,是的画,用该组root节点对应的数据进行初始化
if (!is_root_cache(cachep)) {
struct kmem_cache *root = memcg_root_cache(cachep);
limit = root->limit;
shared = root->shared;
batchcount = root->batchcount;
}
//找到memcg组,直接完成了初始化设置,不许要根据obj大小进行成员值预估
if (limit && shared && batchcount)
goto skip_setup;
/*
* The head array serves three purposes:
* - create a LIFO ordering, i.e. return objects that are cache-warm
* - reduce the number of spinlock operations.
* - reduce the number of linked list operations on the slab and
* bufctl chains: array operations are cheaper.
* The numbers are guessed, we should auto-tune as described by
* Bonwick.
*/
// 根据对象(slab obj)的大小计算local cache中对象的数量
if (cachep->size > 131072)
limit = 1;
else if (cachep->size > PAGE_SIZE)
limit = 8;
else if (cachep->size > 1024)
limit = 24;
else if (cachep->size > 256)
limit = 54;
else
limit = 120;
/*
* CPU bound tasks (e.g. network routing) can exhibit cpu bound
* allocation behaviour: Most allocs on one cpu, most free operations
* on another cpu. For these cases, an efficient object passing between
* cpus is necessary. This is provided by a shared array. The array
* replaces Bonwick's magazine layer.
* On uniprocessor, it's functionally equivalent (but less efficient)
* to a larger limit. Thus disabled by default.
* 多核下设置共享本地缓存实例的个数(struct kmem_cache的shared成员),该slab描述符每个节点的共享缓存中最大
* obj数量为cachep->shared*batchcount
*/
shared = 0;
if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
shared = 8;
batchcount = (limit + 1) / 2;
skip_setup:
/*
*根据前面计算出的limit, batchcount, shared值,为当前slab cache缓存更新本地cpu高速缓存和设置共享cpu高速缓存
*/
err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
end:
if (err)
pr_err("enable_cpucache failed for %s, error %d\n",
cachep->name, -err);
return err;
}
/*
*(1)设置slab cache描述符的本地cpu高速缓存cpu_cache(每cpu变量,是个数组,数组中每个元素对应每个cpu):
* struct kmem_cache的cpu_caches数组中的每个成员需要更新成根据slab obj实际大小计算出来的新struct
* array_cache实例,同时将旧的固定大小的struct array_cache释放
*(2)设置slab cache描述符的共享cpu高速缓存:
* a.设置struct kmem_cache的shared值
* b.设置struct kmem_cache的node数组成员中每个struct kmem_cache_node对应节点上的cpu共享缓存数组.
* 就是跟给该slab cache缓存对应的每个节点分配足够多的struct arrary_cache实例
* c.设置struct kmem_cache的node数组成员中每个struct kmem_cache_node实例对应节点的3个slab链表
*/
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
int batchcount, int shared, gfp_t gfp)
{
int ret;
struct kmem_cache *c;
ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
if (slab_state < FULL)
return ret;
if ((ret < 0) || !is_root_cache(cachep))
return ret;
lockdep_assert_held(&slab_mutex);
for_each_memcg_cache(c, cachep) {
/* return value determined by the root cache only */
__do_tune_cpucache(c, limit, batchcount, shared, gfp);
}
return ret;
}
static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
int batchcount, int shared, gfp_t gfp)
{
struct array_cache __percpu *cpu_cache, *prev;
int cpu;
// 根据limit, batchcount数值,为每个cpu构建新的array_cache实例并存储在cpu_cache数组
cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
if (!cpu_cache)
return -ENOMEM;
//旧的固定大小的array_cache实例数组
prev = cachep->cpu_cache;
//将根据slab obj大小计算出来的新本地cpu高速缓存赋值给slab cache描述符的cpu_cache成员
cachep->cpu_cache = cpu_cache;
//各个cpu上的每cpu变量数据同步(cachep->cpu_cache更新)
kick_all_cpus_sync();
check_irq_on();
//更新该slab cache描述符中与本地cpu高速缓存有关的成员数据
cachep->batchcount = batchcount;
cachep->limit = limit;
cachep->shared = shared;
if (!prev)
goto setup_node;
/*
*此循环就是将旧的本地cpu高速缓存中缓存的slab obj释放给其slab系统.因为slab系统初始化时由于每个本地高
*速缓存对应的avail为0,所以此处可被忽略。
*/
for_each_online_cpu(cpu) {
LIST_HEAD(list);
int node;
struct kmem_cache_node *n;
struct array_cache *ac = per_cpu_ptr(prev, cpu);
node = cpu_to_mem(cpu);
n = get_node(cachep, node);
spin_lock_irq(&n->list_lock);
free_block(cachep, ac->entry, ac->avail, node, &list);
spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
//将旧的cpu本地高速缓存这一Per_CPU变量释放(释放到每个cpu的Per_CPU area)
free_percpu(prev);
setup_node:
return setup_kmem_cache_nodes(cachep, gfp);//分配lab cache描述每个节点对应的共享缓存区域,然后将每个节点对 应的struct kmem_cache_node实例成员进行初始化。
}
// mm/slab.c
/*This initializes kmem_cache_node or resizes various caches for all nodes*/
static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
{
......
//遍历每个节点,并给每个节点的本地cpu高速缓存进行更新
for_each_online_node(node) {
ret = setup_kmem_cache_node(cachep, node, gfp, true);
}
......
}
/*
*给slab cache描述符node数组成员中的每个struct kmem_cache_node实例进行初始化,主要工作:
* 1.共享高速缓存shared成员进行内存空间分配(缓存的entry数组长度cachep->shared*cachep->batchcount)
* 2.3种类型slab链表的初始化。
*/
static int setup_kmem_cache_node(struct kmem_cache *cachep,
int node, gfp_t gfp, bool force_change)
{
int ret = -ENOMEM;
struct kmem_cache_node *n;
struct array_cache *old_shared = NULL;
struct array_cache *new_shared = NULL;
struct alien_cache **new_alien = NULL;
LIST_HEAD(list);
if (use_alien_caches) {
new_alien = alloc_alien_cache(node, cachep->limit, gfp);
if (!new_alien)
goto fail;
}
//多cpu,slab cache描述符需要给每个内存节点分配一个共享cpu缓存区域
if (cachep->shared) {
/*
*每个节点分配的共享缓存区域最多能缓存cachep->shared * cachep->batchcount个slab obj,分配对应空间,并
*将虚拟地址记录到new_shared上
*/
new_shared = alloc_arraycache(node,
cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
if (!new_shared)
goto fail;
}
/*
*1.对本slab cache描述符node数组成员中每个struct kmem_cache_node实例的free_limit成员进行初始化(
* free_limit表示该slab cache描述在对应节点中空闲obj的数量上限,超过该值就会将一定数量的slab obj释放到伙
* 伴系统的空闲链表中)
*2.对本slab cache描述符node数组成员中每个struct kmem_cache_node实例的3个类型slab链表进行初始化。
*/
ret = init_cache_node(cachep, node, gfp);
if (ret)
goto fail;
//获取本slab cache描述符cachep对应节点node的struct kmem_cache_node的描述符
n = get_node(cachep, node);
spin_lock_irq(&n->list_lock);
/*
*释放slab cache描述符管理的每个内存节点旧的cpu共享高速缓存中缓存的slab obj,释放到对应的slab链表中,就是释放
*n->shared->entry数组中每个数组项指向的slab obj对象.
*/
if (n->shared && force_change) {
free_block(cachep, n->shared->entry,
n->shared->avail, node, &list);
n->shared->avail = 0;
}
/*
*将新新分配并初始化的共享cpu缓存空间赋值给slab描述符对应节点的struct kmem_cache_node实例的shared成
*员
*/
if (!n->shared || force_change) {
old_shared = n->shared;
n->shared = new_shared;
new_shared = NULL;
}
if (!n->alien) {
n->alien = new_alien;
new_alien = NULL;
}
spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
/*
* To protect lockless access to n->shared during irq disabled context.
* If n->shared isn't NULL in irq disabled context, accessing to it is
* guaranteed to be valid until irq is re-enabled, because it will be
* freed after synchronize_sched().
*/
if (old_shared && force_change)
synchronize_sched();
fail:
//收尾工作释放旧的struct array_cache(释放到对应slab系统的本地高速缓存中)
kfree(old_shared);
kfree(new_shared);
free_alien_cache(new_alien);
return ret;
}
ps: