对malloc的源码剖析源于我自己实现的共享内存分配器,使用buddy算法和红黑树结构实现对共享内存的分配回收管理,对其性能进行测试的时候,发现性能只有malloc的1/3~1/2,原本以为自己的实现性能应该比malloc高的,结果出乎意外,这让我产生窥探dlmalloc源码的好奇心,为此,找到http: //www.malloc.de/malloc/ptmalloc2.tar.gz的源代码进行分析,该版本在多线程环境中其性能较高,具体见里面的README说明
先拿到malloc入手,去除不必要的宏,其源码如下 void* mALLOc(size_t bytes) { arena *ar_ptr; INTERNAL_SIZE_T nb; // 补齐后的字节数 mchunkptr victim; // request2size是宏,根据bytes,对其进行调整,包括增加相应的cookie数据长度和边界对齐,并且判断是否请求的大小合法,见后面分析 if(request2size(bytes, nb)) return 0; arena_get(ar_ptr, nb); // 根据请求的长度获得对应的arena if(!ar_ptr) // 如果没有对应的arena,则返回0 return 0; victim = chunk_alloc(ar_ptr, nb); // 从arena中获得大小为nb的chunk块 if(!victim) { // 如果没有mmap区域则会导致从arena中分配chunk失败 if(ar_ptr != &main_arena) { // 解决方法1:如果当前的arena不是main_arena,则从main_arena中分配chunk (void)mutex_unlock(&ar_ptr->mutex); (void)mutex_lock(&main_arena.mutex); victim = chunk_alloc(&main_arena, nb); (void)mutex_unlock(&main_arena.mutex); } else { // 否则从其它arena中分配 #if USE_ARENAS /* ... or sbrk() has failed and there is still a chance to mmap() */ ar_ptr = arena_get2(ar_ptr->next ? ar_ptr : 0, nb); // 获取新的arena (void)mutex_unlock(&main_arena.mutex); if(ar_ptr) { victim = chunk_alloc(ar_ptr, nb); (void)mutex_unlock(&ar_ptr->mutex); } #endif } if(!victim) return 0; } else (void)mutex_unlock(&ar_ptr->mutex); return chunk2mem(victim); // 将chunk转换成指针 } 从上面的源码知,malloc的实现很简单: 1、根据请求的bytes大小进行调整 2、获取适当的arena 3、从arena中获取chunk 下面看一下几个重要的宏及函数 // pad request bytes into a usable size, return non-zero on overflow // 对req增加必要的cookie信息所需要的长度然后进行对齐,当溢出时返回1 #define request2size(req, nb) / ((nb = (req) + (SIZE_SZ + MALLOC_ALIGN_MASK)),/ ((long)nb <= 0 || nb < (INTERNAL_SIZE_T) (req) / ? (__set_errno (ENOMEM), 1) / : ((nb < (MINSIZE + MALLOC_ALIGN_MASK) / ? (nb = MINSIZE) : (nb &= ~MALLOC_ALIGN_MASK)), 0))) // 获取arena并对其上锁,首先查找被该线程所占用的arena,如果获取不到,则对arena链表进行搜索,如果仍没有可用的arean,则创建一个新的,创建新的arena时,第二个参数只是用于提示新的arena需要立即分配多大的空间 #define arena_get(ptr, size) do { / Void_t *vptr = NULL; / ptr = (arena *)tsd_getspecific(arena_key, vptr); / if(ptr && !mutex_trylock(&ptr->mutex)) { / THREAD_STAT(++(ptr->stat_lock_direct)); / } else / ptr = arena_get2(ptr, (size)); / } while(0) arena_get2从循环的arena链表中找合适的arena,如果没有找到,则创建新的arena static arena * internal_function arena_get2(arena *a_tsd, size_t size) { arena *a; heap_info *h; char *ptr; int i; unsigned long misalign; if(!a_tsd) // 如果没有传进来arena,则从main_arena开始查找 a = a_tsd = &main_arena; else { a = a_tsd->next; // a_tsd被锁住 if(!a) { // 初始化新的arena时 (void)mutex_lock(&main_arena.mutex); THREAD_STAT(++(main_arena.stat_lock_wait)); return &main_arena; } } // 从arena循环链表中搜索可用的arena repeat: do { if(!mutex_trylock(&a->mutex)) { THREAD_STAT(++(a->stat_lock_loop)); tsd_setspecific(arena_key, (Void_t *)a); return a; } a = a->next; } while(a != a_tsd); // 从arena循环链表中仍没有找到可用的arena,如果不能获得list_lock,则重新尝试 if(mutex_trylock(&list_lock)) { a = a_tsd; goto repeat; } (void)mutex_unlock(&list_lock); // 没有现成可用的arena,所以创建一个新的arena // 首先是创建一个heap h = new_heap(size + (sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT)); if(!h) { // 可能分配的heap太大导致不成功 // 调整大小,创建最小的arena,然后chunk_alloc通过调用mmap_chunk来处理大的chunk块分配请求 h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT); if(!h) // 仍然失败,返回0 return 0; } // 初始化heap的arena信息 a = h->ar_ptr = (arena *)(h+1); for(i=0; i<NAV; i++) init_bin(a, i); a->next = NULL; a->size = h->size; arena_mem += h->size; // 初始化该heap的mutex, lock mutex tsd_setspecific(arena_key, (Void_t *)a); mutex_init(&a->mutex); i = mutex_lock(&a->mutex); /* remember result */ // 设置heap的第一个chunk,并且要正确地对齐 ptr = (char *)(a + 1); misalign = (unsigned long)chunk2mem(ptr) & MALLOC_ALIGN_MASK; if (misalign > 0) ptr += MALLOC_ALIGNMENT - misalign; top(a) = (mchunkptr)ptr; set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE); // 把新的arena添加到arena链表中 (void)mutex_lock(&list_lock); a->next = main_arena.next; main_arena.next = a; (void)mutex_unlock(&list_lock); if(i) /* locking failed; keep arena for further attempts later */ return 0; THREAD_STAT(++(a->stat_lock_loop)); return a; } static mchunkptr internal_function chunk_alloc(arena *ar_ptr, INTERNAL_SIZE_T nb) { mchunkptr victim; /* inspected/selected chunk */ INTERNAL_SIZE_T victim_size; /* its size */ int idx; /* index for bin traversal */ mbinptr bin; /* associated bin */ mchunkptr remainder; /* remainder from a split */ long remainder_size; /* its size */ int remainder_index; /* its bin index */ unsigned long block; /* block traverser bit */ int startidx; /* first bin of a traversed block */ mchunkptr fwd; /* misc temp for linking */ mchunkptr bck; /* misc temp for linking */ mbinptr q; /* misc temp */ // 先检查是否小于512字节,如果是,则是全匹配 if (is_small_request(nb)) { idx = smallbin_index(nb); q = bin_at(ar_ptr, idx); victim = last(q); // 检查下一个bin,否则会出现对chunk进行拆分导致拆分的chunk size < MINSIZE的情况发生 if (victim == q) { q = next_bin(q); victim = last(q); } if (victim != q) // 从bin中获取到chunk,返回 { victim_size = chunksize(victim); unlink(victim, bck, fwd); set_inuse_bit_at_offset(victim, victim_size); check_malloced_chunk(ar_ptr, victim, nb); return victim; } // 从上面的两个bin中没有找到合适的chunk,则需要从其余的bin中进行查找 idx += 2; } else { // 根据nb的大小返回对应的bin的index值 idx = bin_index(nb); bin = bin_at(ar_ptr, idx); for (victim = last(bin); victim != bin; victim = victim->bk) { victim_size = chunksize(victim); remainder_size = victim_size - nb; // 拆分后的chunk的size太大,不适用, if (remainder_size >= (long)MINSIZE) { --idx; // 调整bin的index值为remainder break; } else if (remainder_size >= 0) // 完全匹配 { unlink(victim, bck, fwd); set_inuse_bit_at_offset(victim, victim_size); check_malloced_chunk(ar_ptr, victim, nb); return victim; } } ++idx; } // 利用上次拆分剩余的chunk if ( (victim = last_remainder(ar_ptr)->fd) != last_remainder(ar_ptr)) { victim_size = chunksize(victim); remainder_size = victim_size - nb; if (remainder_size >= (long)MINSIZE) // 仍然太大,还得继续拆分 { remainder = chunk_at_offset(victim, nb); set_head(victim, nb | PREV_INUSE); link_last_remainder(ar_ptr, remainder); set_head(remainder, remainder_size | PREV_INUSE); set_foot(remainder, remainder_size); check_malloced_chunk(ar_ptr, victim, nb); return victim; } // 上次拆分剩余的chunk的大小正好合适,把该chunk分配出去,把它从remainder链表中去掉 clear_last_remainder(ar_ptr); if (remainder_size >= 0) // remainder链表不为空,则简单把拆分后剩下的chunk进行设置 { set_inuse_bit_at_offset(victim, victim_size); check_malloced_chunk(ar_ptr, victim, nb); return victim; } // 否则需要把拆分后剩下的chunk放到remainder链表中 frontlink(ar_ptr, victim, victim_size, remainder_index, bck, fwd); } // 如果还有可能的非空并且足够大的块,从其它bin中进行查找最匹配的chunk if ( (block = idx2binblock(idx)) <= binblocks(ar_ptr)) { // 获得第一个标识的非空块 if ( (block & binblocks(ar_ptr)) == 0) { // 到偶数的块 idx = (idx & ~(BINBLOCKWIDTH - 1)) + BINBLOCKWIDTH; block <<= 1; while ((block & binblocks(ar_ptr)) == 0) { idx += BINBLOCKWIDTH; block <<= 1; } } // 对每一个可能的非空块 for (;;) { startidx = idx; // 跟踪不完整的块 q = bin = bin_at(ar_ptr, idx); // 对于每一个在该block中的bin do { // 寻找并使用第一个足够大的chunk for (victim = last(bin); victim != bin; victim = victim->bk) { victim_size = chunksize(victim); remainder_size = victim_size - nb; if (remainder_size >= (long)MINSIZE) // 拆分 { remainder = chunk_at_offset(victim, nb); set_head(victim, nb | PREV_INUSE); unlink(victim, bck, fwd); link_last_remainder(ar_ptr, remainder); set_head(remainder, remainder_size | PREV_INUSE); set_foot(remainder, remainder_size); check_malloced_chunk(ar_ptr, victim, nb); return victim; } else if (remainder_size >= 0) // 使用该chunk { set_inuse_bit_at_offset(victim, victim_size); unlink(victim, bck, fwd); check_malloced_chunk(ar_ptr, victim, nb); return victim; } } bin = next_bin(bin); } while ((++idx & (BINBLOCKWIDTH - 1)) != 0); // 清除block标识位 do { if ((startidx & (BINBLOCKWIDTH - 1)) == 0) { binblocks(ar_ptr) &= ~block; break; } --startidx; q = prev_bin(q); } while (first(q) == q); // 获取下一个可能的非空块 if ( (block <<= 1) <= binblocks(ar_ptr) && (block != 0) ) { while ((block & binblocks(ar_ptr)) == 0) { idx += BINBLOCKWIDTH; block <<= 1; } } else break; } } // 尝试使用top chunk,要求要有一个remainder,从而确保top总是存在的 if ( (remainder_size = chunksize(top(ar_ptr)) - nb) < (long)MINSIZE) { /* If big and would otherwise need to extend, try to use mmap instead */ // 如果太大,那么需要扩展,使用mmap if ((unsigned long)nb >= (unsigned long)mmap_threshold && (victim = mmap_chunk(nb)) != 0) return victim; /* Try to extend */ // 否则使用malloc_extend_top对top-most chunk进行扩展 malloc_extend_top(ar_ptr, nb); if ((remainder_size = chunksize(top(ar_ptr)) - nb) < (long)MINSIZE) return 0; /* propagate failure */ } victim = top(ar_ptr); set_head(victim, nb | PREV_INUSE); top(ar_ptr) = chunk_at_offset(victim, nb); set_head(top(ar_ptr), remainder_size | PREV_INUSE); check_malloced_chunk(ar_ptr, victim, nb); return victim; } |