kmalloc
|----->return __kmalloc(size, flags);
| |----->return __do_kmalloc(size, flags, _RET_IP_);
| | |----->return __do_kmalloc(size, flags, _RET_IP_);
| | | |----->cachep = kmalloc_slab(size, flags);
| | | | 根据size找到最合适的kmem cache实例
| | | |----->ret = slab_alloc(cachep, flags, caller);
| | | | 从slab中分配object
| | | | |----->objp = __do_cache_alloc(cachep, flags);
| | | | | |----->return ____cache_alloc(cachep, flags);
| | | | | | |----->ac = cpu_cache_get(cachep);
| | | | | | | 获取array cache,ac维护一组object cache
| | | | | | |----->objp = ac->entry[--ac->avail];
| | | | | | | 如果array cache有可用的object,则支持返回,否则调用
| | | | | | | cache_alloc_refill填充array cache,并分配object
| | | | | | |----->objp = cache_alloc_refill(cachep, flags);
| | | | | | | 该函数先查看slab中是否可以分配到object,如果没有则从buddy system
| | | | | | | 分配slab,并从slab中分配object,详情参考重要函数分析。
| | | | | | |----->return objp;
我们可以看到kmalloc是分配一个object,object size大于或等于需要分配的空间的size,所以这里会存在挺多的空间浪费。
kfree
|----->c = virt_to_cache(objp);
| 根据object地址找到kmem cache实例,详见重要函数分析。
|----->__cache_free(c, (void *)objp, _RET_IP_);
| |----->___cache_free(cachep, objp, caller);
| | |----->cache_flusharray(cachep, ac);
| | | 如果array_cache中的object已经达到上限,则调用该函数将array_cache中的object移入
| | | shared array cache或者slab中,详见重要函数分析
| | |----->ac->entry[ac->avail++] = objp;
| | | 如果array_cache中object没有达到上限,则将object释放到array_cache。
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
{
int batchcount;
struct kmem_cache_node *n;
struct array_cache *ac, *shared;
int node;
void *list = NULL;
struct page *page;
check_irq_off();
node = numa_mem_id();
ac = cpu_cache_get(cachep);
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
/*
* If there was little recent activity on this cache, then
* perform only a partial refill. Otherwise we could generate
* refill bouncing.
*/
batchcount = BATCHREFILL_LIMIT;
}
n = get_node(cachep, node);
BUG_ON(ac->avail > 0 || !n);
shared = READ_ONCE(n->shared);
if (!n->free_objects && (!shared || !shared->avail))
goto direct_grow;
spin_lock(&n->list_lock);
shared = READ_ONCE(n->shared);
/* See if we can refill from the shared array */
if (shared && transfer_objects(ac, shared, batchcount)) {
shared->touched = 1;
goto alloc_done;
}
while (batchcount > 0) {
/* Get slab alloc is to come from. */
page = get_first_slab(n, false);
if (!page)
goto must_grow;
check_spinlock_acquired(cachep);
batchcount = alloc_block(cachep, ac, page, batchcount);
fixup_slab_list(cachep, n, page, &list);
}
must_grow:
n->free_objects -= ac->avail;
alloc_done:
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
direct_grow:
if (unlikely(!ac->avail)) {
/* Check if we can use obj in pfmemalloc slab */
if (sk_memalloc_socks()) {
void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
if (obj)
return obj;
}
page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
/*
* cache_grow_begin() can reenable interrupts,
* then ac could change.
*/
ac = cpu_cache_get(cachep);
if (!ac->avail && page)
alloc_block(cachep, ac, page, batchcount);
cache_grow_end(cachep, page);
if (!ac->avail)
return NULL;
}
ac->touched = 1;
return ac->entry[--ac->avail];
}
该函数用于填充kmem cache实例中的array_cache,处理过程如下:
这里有两个变量特别重要,需要特别说明一下:
virt_to_cache调用virt_to_head_page(obj)获取该object所在的pages,而page->slab_cache则指向mem cache实例。所以这里最终要的是理解如何通过object地址获取其所在的page。
static inline struct page *compound_head(struct page *page)
{
unsigned long head = READ_ONCE(page->compound_head);
if (unlikely(head & 1))
return (struct page *) (head - 1);
return page;
}
static inline struct page *virt_to_head_page(const void *x)
{
struct page *page = virt_to_page(x);
return compound_head(page);
}
这里最终要的是理解page->compound_head。学习过buddy system我们知道page是按order来分配和管理的,所以有可能一组连续的page组成pages,这组pages中所有的page实例的compound_head成员都指向第一个page的地址。这里的处理流程分为三步:
static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
{
int batchcount;
struct kmem_cache_node *n;
int node = numa_mem_id();
LIST_HEAD(list);
batchcount = ac->batchcount;
check_irq_off();
n = get_node(cachep, node);
spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
int max = shared_array->limit - shared_array->avail;
if (max) {
if (batchcount > max)
batchcount = max;
memcpy(&(shared_array->entry[shared_array->avail]),
ac->entry, sizeof(void *) * batchcount);
shared_array->avail += batchcount;
goto free_done;
}
}
free_block(cachep, ac->entry, batchcount, node, &list);
free_done:
#if STATS
{
int i = 0;
struct page *page;
list_for_each_entry(page, &n->slabs_free, lru) {
BUG_ON(page->active);
i++;
}
STATS_SET_FREEABLE(cachep, i);
}
#endif
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}
该函数用于将array_cache中的object移入shared array cache或者slab中,处理流程如下:
static void free_block(struct kmem_cache *cachep, void **objpp,
int nr_objects, int node, struct list_head *list)
{
int i;
struct kmem_cache_node *n = get_node(cachep, node);
struct page *page;
n->free_objects += nr_objects;
for (i = 0; i < nr_objects; i++) {
void *objp;
struct page *page;
objp = objpp[i];
page = virt_to_head_page(objp);
list_del(&page->lru);
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp);
STATS_DEC_ACTIVE(cachep);
/* fixup slab chains */
if (page->active == 0)
list_add(&page->lru, &n->slabs_free);
else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
*/
list_add_tail(&page->lru, &n->slabs_partial);
}
}
while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
n->free_objects -= cachep->num;
page = list_last_entry(&n->slabs_free, struct page, lru);
list_move(&page->lru, list);
n->num_slabs--;
}
}
该函数将object移入slab中,处理流程如下: