对malloc的源码剖析源于我自己实现的共享内存分配器,使用buddy算法和红黑树结构实现对共享内存的分配回收管理,对其性能进行测试的时候,发现性能只有malloc的1/3~1/2,原本以为自己的实现性能应该比malloc高的,结果出乎意外,这让我产生窥探dlmalloc源码的好奇心,为此,找到http: //www.malloc.de/malloc/ptmalloc2.tar.gz的源代码进行分析,该版本在多线程环境中其性能较高,具体见里面的README说明
先拿到malloc入手,去除不必要的宏,其源码如下
void* mALLOc(size_t bytes)
{
arena *ar_ptr;
INTERNAL_SIZE_T nb; // 补齐后的字节数
mchunkptr victim;
// request2size是宏,根据bytes,对其进行调整,包括增加相应的cookie数据长度和边界对齐,并且判断是否请求的大小合法,见后面分析
if(request2size(bytes, nb))
return 0;
arena_get(ar_ptr, nb); // 根据请求的长度获得对应的arena
if(!ar_ptr) // 如果没有对应的arena,则返回0
return 0;
victim = chunk_alloc(ar_ptr, nb); // 从arena中获得大小为nb的chunk块
if(!victim) { // 如果没有mmap区域则会导致从arena中分配chunk失败
if(ar_ptr != &main_arena) { // 解决方法1:如果当前的arena不是main_arena,则从main_arena中分配chunk
(void)mutex_unlock(&ar_ptr->mutex);
(void)mutex_lock(&main_arena.mutex);
victim = chunk_alloc(&main_arena, nb);
(void)mutex_unlock(&main_arena.mutex);
} else { // 否则从其它arena中分配
#if USE_ARENAS
/* ... or sbrk() has failed and there is still a chance to mmap() */
ar_ptr = arena_get2(ar_ptr->next ? ar_ptr : 0, nb); // 获取新的arena
(void)mutex_unlock(&main_arena.mutex);
if(ar_ptr) {
victim = chunk_alloc(ar_ptr, nb);
(void)mutex_unlock(&ar_ptr->mutex);
}
#endif
}
if(!victim) return 0;
} else
(void)mutex_unlock(&ar_ptr->mutex);
return chunk2mem(victim); // 将chunk转换成指针
}
从上面的源码知,malloc的实现很简单:
1、根据请求的bytes大小进行调整
2、获取适当的arena
3、从arena中获取chunk
下面看一下几个重要的宏及函数
// pad request bytes into a usable size, return non-zero on overflow
// 对req增加必要的cookie信息所需要的长度然后进行对齐,当溢出时返回1
#define request2size(req, nb) /
((nb = (req) + (SIZE_SZ + MALLOC_ALIGN_MASK)),/
((long)nb <= 0 || nb < (INTERNAL_SIZE_T) (req) /
? (__set_errno (ENOMEM), 1) /
: ((nb < (MINSIZE + MALLOC_ALIGN_MASK) /
? (nb = MINSIZE) : (nb &= ~MALLOC_ALIGN_MASK)), 0)))
// 获取arena并对其上锁,首先查找被该线程所占用的arena,如果获取不到,则对arena链表进行搜索,如果仍没有可用的arean,则创建一个新的,创建新的arena时,第二个参数只是用于提示新的arena需要立即分配多大的空间
#define arena_get(ptr, size) do { /
Void_t *vptr = NULL; /
ptr = (arena *)tsd_getspecific(arena_key, vptr); /
if(ptr && !mutex_trylock(&ptr->mutex)) { /
THREAD_STAT(++(ptr->stat_lock_direct)); /
} else /
ptr = arena_get2(ptr, (size)); /
} while(0)
arena_get2从循环的arena链表中找合适的arena,如果没有找到,则创建新的arena
static arena * internal_function arena_get2(arena *a_tsd, size_t size)
{
arena *a;
heap_info *h;
char *ptr;
int i;
unsigned long misalign;
if(!a_tsd) // 如果没有传进来arena,则从main_arena开始查找
a = a_tsd = &main_arena;
else {
a = a_tsd->next; // a_tsd被锁住
if(!a) { // 初始化新的arena时
(void)mutex_lock(&main_arena.mutex);
THREAD_STAT(++(main_arena.stat_lock_wait));
return &main_arena;
}
}
// 从arena循环链表中搜索可用的arena
repeat:
do {
if(!mutex_trylock(&a->mutex)) {
THREAD_STAT(++(a->stat_lock_loop));
tsd_setspecific(arena_key, (Void_t *)a);
return a;
}
a = a->next;
} while(a != a_tsd);
// 从arena循环链表中仍没有找到可用的arena,如果不能获得list_lock,则重新尝试
if(mutex_trylock(&list_lock)) {
a = a_tsd;
goto repeat;
}
(void)mutex_unlock(&list_lock);
// 没有现成可用的arena,所以创建一个新的arena
// 首先是创建一个heap
h = new_heap(size + (sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT));
if(!h) { // 可能分配的heap太大导致不成功
// 调整大小,创建最小的arena,然后chunk_alloc通过调用mmap_chunk来处理大的chunk块分配请求
h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT);
if(!h) // 仍然失败,返回0
return 0;
}
// 初始化heap的arena信息
a = h->ar_ptr = (arena *)(h+1);
for(i=0; i<NAV; i++)
init_bin(a, i);
a->next = NULL;
a->size = h->size;
arena_mem += h->size;
// 初始化该heap的mutex, lock mutex
tsd_setspecific(arena_key, (Void_t *)a);
mutex_init(&a->mutex);
i = mutex_lock(&a->mutex); /* remember result */
// 设置heap的第一个chunk,并且要正确地对齐
ptr = (char *)(a + 1);
misalign = (unsigned long)chunk2mem(ptr) & MALLOC_ALIGN_MASK;
if (misalign > 0)
ptr += MALLOC_ALIGNMENT - misalign;
top(a) = (mchunkptr)ptr;
set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE);
// 把新的arena添加到arena链表中
(void)mutex_lock(&list_lock);
a->next = main_arena.next;
main_arena.next = a;
(void)mutex_unlock(&list_lock);
if(i) /* locking failed; keep arena for further attempts later */
return 0;
THREAD_STAT(++(a->stat_lock_loop));
return a;
}
static mchunkptr internal_function chunk_alloc(arena *ar_ptr, INTERNAL_SIZE_T nb)
{
mchunkptr victim; /* inspected/selected chunk */
INTERNAL_SIZE_T victim_size; /* its size */
int idx; /* index for bin traversal */
mbinptr bin; /* associated bin */
mchunkptr remainder; /* remainder from a split */
long remainder_size; /* its size */
int remainder_index; /* its bin index */
unsigned long block; /* block traverser bit */
int startidx; /* first bin of a traversed block */
mchunkptr fwd; /* misc temp for linking */
mchunkptr bck; /* misc temp for linking */
mbinptr q; /* misc temp */
// 先检查是否小于512字节,如果是,则是全匹配
if (is_small_request(nb))
{
idx = smallbin_index(nb);
q = bin_at(ar_ptr, idx);
victim = last(q);
// 检查下一个bin,否则会出现对chunk进行拆分导致拆分的chunk size < MINSIZE的情况发生
if (victim == q)
{
q = next_bin(q);
victim = last(q);
}
if (victim != q) // 从bin中获取到chunk,返回
{
victim_size = chunksize(victim);
unlink(victim, bck, fwd);
set_inuse_bit_at_offset(victim, victim_size);
check_malloced_chunk(ar_ptr, victim, nb);
return victim;
}
// 从上面的两个bin中没有找到合适的chunk,则需要从其余的bin中进行查找
idx += 2;
}
else
{
// 根据nb的大小返回对应的bin的index值
idx = bin_index(nb);
bin = bin_at(ar_ptr, idx);
for (victim = last(bin); victim != bin; victim = victim->bk)
{
victim_size = chunksize(victim);
remainder_size = victim_size - nb;
// 拆分后的chunk的size太大,不适用,
if (remainder_size >= (long)MINSIZE)
{
--idx; // 调整bin的index值为remainder
break;
}
else if (remainder_size >= 0) // 完全匹配
{
unlink(victim, bck, fwd);
set_inuse_bit_at_offset(victim, victim_size);
check_malloced_chunk(ar_ptr, victim, nb);
return victim;
}
}
++idx;
}
// 利用上次拆分剩余的chunk
if ( (victim = last_remainder(ar_ptr)->fd) != last_remainder(ar_ptr))
{
victim_size = chunksize(victim);
remainder_size = victim_size - nb;
if (remainder_size >= (long)MINSIZE) // 仍然太大,还得继续拆分
{
remainder = chunk_at_offset(victim, nb);
set_head(victim, nb | PREV_INUSE);
link_last_remainder(ar_ptr, remainder);
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);
check_malloced_chunk(ar_ptr, victim, nb);
return victim;
}
// 上次拆分剩余的chunk的大小正好合适,把该chunk分配出去,把它从remainder链表中去掉
clear_last_remainder(ar_ptr);
if (remainder_size >= 0) // remainder链表不为空,则简单把拆分后剩下的chunk进行设置
{
set_inuse_bit_at_offset(victim, victim_size);
check_malloced_chunk(ar_ptr, victim, nb);
return victim;
}
// 否则需要把拆分后剩下的chunk放到remainder链表中
frontlink(ar_ptr, victim, victim_size, remainder_index, bck, fwd);
}
// 如果还有可能的非空并且足够大的块,从其它bin中进行查找最匹配的chunk
if ( (block = idx2binblock(idx)) <= binblocks(ar_ptr))
{
// 获得第一个标识的非空块
if ( (block & binblocks(ar_ptr)) == 0)
{
// 到偶数的块
idx = (idx & ~(BINBLOCKWIDTH - 1)) + BINBLOCKWIDTH;
block <<= 1;
while ((block & binblocks(ar_ptr)) == 0)
{
idx += BINBLOCKWIDTH;
block <<= 1;
}
}
// 对每一个可能的非空块
for (;;)
{
startidx = idx; // 跟踪不完整的块
q = bin = bin_at(ar_ptr, idx);
// 对于每一个在该block中的bin
do
{
// 寻找并使用第一个足够大的chunk
for (victim = last(bin); victim != bin; victim = victim->bk)
{
victim_size = chunksize(victim);
remainder_size = victim_size - nb;
if (remainder_size >= (long)MINSIZE) // 拆分
{
remainder = chunk_at_offset(victim, nb);
set_head(victim, nb | PREV_INUSE);
unlink(victim, bck, fwd);
link_last_remainder(ar_ptr, remainder);
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);
check_malloced_chunk(ar_ptr, victim, nb);
return victim;
}
else if (remainder_size >= 0) // 使用该chunk
{
set_inuse_bit_at_offset(victim, victim_size);
unlink(victim, bck, fwd);
check_malloced_chunk(ar_ptr, victim, nb);
return victim;
}
}
bin = next_bin(bin);
} while ((++idx & (BINBLOCKWIDTH - 1)) != 0);
// 清除block标识位
do
{
if ((startidx & (BINBLOCKWIDTH - 1)) == 0)
{
binblocks(ar_ptr) &= ~block;
break;
}
--startidx;
q = prev_bin(q);
} while (first(q) == q);
// 获取下一个可能的非空块
if ( (block <<= 1) <= binblocks(ar_ptr) && (block != 0) )
{
while ((block & binblocks(ar_ptr)) == 0)
{
idx += BINBLOCKWIDTH;
block <<= 1;
}
}
else
break;
}
}
// 尝试使用top chunk,要求要有一个remainder,从而确保top总是存在的
if ( (remainder_size = chunksize(top(ar_ptr)) - nb) < (long)MINSIZE)
{
/* If big and would otherwise need to extend, try to use mmap instead */
// 如果太大,那么需要扩展,使用mmap
if ((unsigned long)nb >= (unsigned long)mmap_threshold &&
(victim = mmap_chunk(nb)) != 0)
return victim;
/* Try to extend */
// 否则使用malloc_extend_top对top-most chunk进行扩展
malloc_extend_top(ar_ptr, nb);
if ((remainder_size = chunksize(top(ar_ptr)) - nb) < (long)MINSIZE)
return 0; /* propagate failure */
}
victim = top(ar_ptr);
set_head(victim, nb | PREV_INUSE);
top(ar_ptr) = chunk_at_offset(victim, nb);
set_head(top(ar_ptr), remainder_size | PREV_INUSE);
check_malloced_chunk(ar_ptr, victim, nb);
return victim;
}
总结:dlmalloc采用边界标识算法(boundary tag)和bin对内存进行管理,从bin中进行分配时采用
smalles-fit,best-fit算法,通过arena将heap与bin对应起来