用户通过 malloc()
或类似的函数如 realloc()
等申请的堆空间,系统是不会自动回收的,必须由用户手动通过 free()
函数回收。free()
函数的定义位于 glibc 的 malloc.c 文件中。实际上在 glibc 内部 free()
函数只是 __libc_free()
函数的别名,而 __libc_free()
函数的工作又主要由 _int_free()
完成。因此,分析 free()
函数,即是分析 __libc_free()
以及 _int_free()
这两个函数。
本文以 glibc 2.24 版本中的 free()
函数为讲述对象,从源代码的角度简要地分析 free()
函数的具体实现。注意,要读懂此文需要读者对 Linux 平台的堆分配有基本了解,明白 bin、chunk 等基本要素的含义。
首先, free()
为 __libc_free()
的别名:
strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
__libc_free()
void
__libc_free (void *mem)
{
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */
void (*hook) (void *, const void *)
= atomic_forced_read (__free_hook);
if (__builtin_expect (hook != NULL, 0))
{
(*hook)(mem, RETURN_ADDRESS (0));
return;
}
if (mem == 0) /* free(0) has no effect */
return;
p = mem2chunk (mem);
if (chunk_is_mmapped (p)) /* release mmapped memory. */
{
/* See if the dynamic brk/mmap threshold needs adjusting.
Dumped fake mmapped chunks do not affect the threshold. */
if (!mp_.no_dyn_threshold
&& p->size > mp_.mmap_threshold
&& p->size <= DEFAULT_MMAP_THRESHOLD_MAX
&& !DUMPED_MAIN_ARENA_CHUNK (p))
{
mp_.mmap_threshold = chunksize (p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk (p);
return;
}
ar_ptr = arena_for_chunk (p);
_int_free (ar_ptr, p, 0);
}
_int_free()
/*
------------------------------ free ------------------------------
*/
static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
INTERNAL_SIZE_T size; /* its size */
mfastbinptr *fb; /* associated fastbin */
mchunkptr nextchunk; /* next contiguous chunk */
INTERNAL_SIZE_T nextsize; /* its size */
int nextinuse; /* true if nextchunk is used */
INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
mchunkptr bck; /* misc temp for linking */
mchunkptr fwd; /* misc temp for linking */
const char *errstr = NULL;
int locked = 0;
size = chunksize (p);
/* Little security check which won't hurt performance: the
allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear
here by accident or by "design" from some intruder. */
if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
|| __builtin_expect (misaligned_chunk (p), 0))
{
errstr = "free(): invalid pointer";
errout:
if (!have_lock && locked)
(void) mutex_unlock (&av->mutex);
malloc_printerr (check_action, errstr, chunk2mem (p), av);
return;
}
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
{
errstr = "free(): invalid size";
goto errout;
}
check_inuse_chunk(av, p);
/*
If eligible, place chunk on a fastbin so it can be found
and used quickly in malloc.
*/
if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks
bordering top into fastbins
*/
&& (chunk_at_offset(p, size) != av->top)
#endif
) {
if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>= av->system_mem, 0))
{
/* We might not have a lock at this point and concurrent modifications
of system_mem might have let to a false positive. Redo the test
after getting the lock. */
if (have_lock
|| ({ assert (locked == 0);
mutex_lock(&av->mutex);
locked = 1;
chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
|| chunksize (chunk_at_offset (p, size)) >= av->system_mem;
}))
{
errstr = "free(): invalid next size (fast)";
goto errout;
}
if (! have_lock)
{
(void)mutex_unlock(&av->mutex);
locked = 0;
}
}
free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
set_fastchunks(av);
unsigned int idx = fastbin_index(size);
fb = &fastbin (av, idx);
/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;
unsigned int old_idx = ~0u;
do
{
/* Check that the top of the bin is not the record we are going to add
(i.e., double free). */
if (__builtin_expect (old == p, 0))
{
errstr = "double free or corruption (fasttop)";
goto errout;
}
/* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD
only if we have the lock, otherwise it might have already been
deallocated. See use of OLD_IDX below for the actual check. */
if (have_lock && old != NULL)
old_idx = fastbin_index(chunksize(old));
p->fd = old2 = old;
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
{
errstr = "invalid fastbin entry (free)";
goto errout;
}
}
/*
Consolidate other non-mmapped chunks as they arrive.
*/
else if (!chunk_is_mmapped(p)) {
if (! have_lock) {
(void)mutex_lock(&av->mutex);
locked = 1;
}
nextchunk = chunk_at_offset(p, size);
/* Lightweight tests: check whether the block is already the
top block. */
if (__glibc_unlikely (p == av->top))
{
errstr = "double free or corruption (top)";
goto errout;
}
/* Or whether the next chunk is beyond the boundaries of the arena. */
if (__builtin_expect (contiguous (av)
&& (char *) nextchunk
>= ((char *) av->top + chunksize(av->top)), 0))
{
errstr = "double free or corruption (out)";
goto errout;
}
/* Or whether the block is actually not marked used. */
if (__glibc_unlikely (!prev_inuse(nextchunk)))
{
errstr = "double free or corruption (!prev)";
goto errout;
}
nextsize = chunksize(nextchunk);
if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (nextsize >= av->system_mem, 0))
{
errstr = "free(): invalid next size (normal)";
goto errout;
}
free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
/* consolidate backward */
if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}
if (nextchunk != av->top) {
/* get and clear inuse bit */
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
/* consolidate forward */
if (!nextinuse) {
unlink(av, nextchunk, bck, fwd);
size += nextsize;
} else
clear_inuse_bit_at_offset(nextchunk, 0);
/*
Place the chunk in unsorted chunk list. Chunks are
not placed into regular bins until after they have
been given one chance to be used in malloc.
*/
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "free(): corrupted unsorted chunks";
goto errout;
}
p->fd = fwd;
p->bk = bck;
if (!in_smallbin_range(size))
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
bck->fd = p;
fwd->bk = p;
set_head(p, size | PREV_INUSE);
set_foot(p, size);
check_free_chunk(av, p);
}
/*
If the chunk borders the current high end of memory,
consolidate into top
*/
else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
check_chunk(av, p);
}
/*
If freeing a large space, consolidate possibly-surrounding
chunks. Then, if the total unused topmost memory exceeds trim
threshold, ask malloc_trim to reduce top.
Unless max_fast is 0, we don't know if there are fastbins
bordering top, so we cannot tell for sure whether threshold
has been reached unless fastbins are consolidated. But we
don't want to consolidate on each free. As a compromise,
consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
is reached.
*/
if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
if (have_fastchunks(av))
malloc_consolidate(av);
if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
if ((unsigned long)(chunksize(av->top)) >=
(unsigned long)(mp_.trim_threshold))
systrim(mp_.top_pad, av);
#endif
} else {
/* Always try heap_trim(), even if the top chunk is not
large, because the corresponding heap might go away. */
heap_info *heap = heap_for_ptr(top(av));
assert(heap->ar_ptr == av);
heap_trim(heap, mp_.top_pad);
}
}
if (! have_lock) {
assert (locked);
(void)mutex_unlock(&av->mutex);
}
}
/*
If the chunk was allocated via mmap, release via munmap().
*/
else {
munmap_chunk (p);
}
}
首先看 __libc_free()
的参数:
void __libc_free (void *mem);
这里的 void *mem
即是 free(p)
中的 p
。
然后有一个检查:
if (mem == 0) /* free(0) has no effect */
return;
这里说明了如果释放的是一个空指针,那么 __libc_free()
不会有任何动作,是直接返回的。
然后通过 mem2chunk
宏获取 void *mem
对应的 chunk 的指针为 p
:
p = mem2chunk (mem);
接下来判断 p
是否是通过 mmap()
分配的,如果不是就交由 _int_free()
负责:
if (chunk_is_mmapped (p)) /* release mmapped memory. */
{
...
return;
}
ar_ptr = arena_for_chunk (p);
_int_free (ar_ptr, p, 0);
如果 p
是通过 mmap() 分配得到的,则先判断是否需要修改 mmap 的分配阈值以及系统的收缩阈值:
if (!mp_.no_dyn_threshold
&& p->size > mp_.mmap_threshold
&& p->size <= DEFAULT_MMAP_THRESHOLD_MAX
&& !DUMPED_MAIN_ARENA_CHUNK (p))
{
mp_.mmap_threshold = chunksize (p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
这里的 mp._mmap_threshold
起始值为 128KB,它是系统选择 mmap()
还是 brk()
进行分配的一个门槛值,它是动态变化的。当使用 malloc()
申请的空间大于 mp._mmap_threshold
时,系统会选择 mmap()
进行分配,否则选择 brk()
进行分配。我还没有完全理解这个知识点,后面还会撰文介绍,这里就不详细说了。
接下来就是调用 munmap_chunk()
进行释放工作:
if (chunk_is_mmapped (p)) /* release mmapped memory. */
{
...
munmap_chunk (p);
return;
}
我还没有完全理解 munmap_chunk()
,并且在大多数情况下我们申请的堆空间都是比较小,通过 _int_free()
分配,因此这里就暂时不解释 munmap_chunk()
了。
在 __libc_free()
函数中,如果释放的堆空间不是通过 mmap()
分配的,那么剩下的工作就会交给 _int_free()
完成,如下:
void
__libc_free (void *mem)
{
...
p = mem2chunk (mem);
if (chunk_is_mmapped (p)) /* release mmapped memory. */
{
...
return;
}
ar_ptr = arena_for_chunk (p);
_int_free (ar_ptr, p, 0);
}
进入 _int_free()
函数,p
即是需要释放的 chunk。首先通过 chunksize()
宏得到 p
的大小,存放在 size
中:
size = chunksize (p);
接下来是三项合法性检查,判断 size
的尺寸是否过大、过小,是否对齐以及 p
是否正在使用中,任何一项检查不通过都会中断释放,打印错误信息:
/* Little security check which won't hurt performance: the
allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear
here by accident or by "design" from some intruder. */
if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
|| __builtin_expect (misaligned_chunk (p), 0))
{
errstr = "free(): invalid pointer";
errout:
if (!have_lock && locked)
(void) mutex_unlock (&av->mutex);
malloc_printerr (check_action, errstr, chunk2mem (p), av);
return;
}
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
{
errstr = "free(): invalid size";
goto errout;
}
check_inuse_chunk(av, p);
接下来还有许多这种检查,正常情况下这些检查都是不会出错的,所以除非比较特别,一般的检查后文就都忽略了。
接下来判断 p
的尺寸,如果小于或等于 get_max_fast ()
并且不和 top_chunk
相邻,则说明 p
应当被回收插入到 fastbin
中:
if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())
#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks
bordering top into fastbins
*/
&& (chunk_at_offset(p, size) != av->top)
#endif
)
下面的代码即是将 p
插入到 fastbin
中:
/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;
unsigned int old_idx = ~0u;
do
{
/* Check that the top of the bin is not the record we are going to add
(i.e., double free). */
if (__builtin_expect (old == p, 0))
{
errstr = "double free or corruption (fasttop)";
goto errout;
}
/* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD
only if we have the lock, otherwise it might have already been
deallocated. See use of OLD_IDX below for the actual check. */
if (have_lock && old != NULL)
old_idx = fastbin_index(chunksize(old));
p->fd = old2 = old;
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
fastbin
是一个单向链表,它的工作方式被定义为插入和分配都是在链表头,即 LIFO。所以主要的工作其实就是 P->FD = *FB; *FB = P;
。
在上面的代码中还包含了一些上锁和检查操作,这里有一个值得注意的检查是:
/* Check that the top of the bin is not the record we are going to add
(i.e., double free). */
if (__builtin_expect (old == p, 0))
{
errstr = "double free or corruption (fasttop)";
goto errout;
}
它是判断 p
和当前 fastbin
的链表头的 chunk 是否是同一个 chunk,比如说我们 free(p)
之后,立即又一次 free(p)
,那么这时候就过不了这个检查了,即防止 double free。
如果尺寸不合适,不应该插入到 fastbin
,那么接下来就判断 p
是否是通过 mmap()
分配的。
这里值得注意的是,明明在 __libc_free()
中已经进行过判断,只有当 p
不是通过 mmap()
分配的情况下才会进入 _int_free()
,为什么这里还要再作一次判断?这是因为 _int_free()
在 sysmalloc()
、__libc_realloc
等函数中也被调用了,这时候传递给 _int_free()
的 p
可不一定经过 chunk_is_mmapped(p)
这个检查。
和 __libc_free()
中一样,如果是通过 mmap()
分配的,那么剩下的释放工作都将交由 munmap_chunk()
负责。但是相较于 __libc_free()
,这里少了对 mmap 的分配阈值以及系统的收缩阈值的修改操作。
else if (!chunk_is_mmapped(p))
{
...
}
/*
If the chunk was allocated via mmap, release via munmap().
*/
else {
munmap_chunk (p);
}
如果不是通过 mmap()
分配的,那么将进入 if 语句块,将 p
合并后插入到 unsorted_bin
或者直接合并到 top_chunk
。因为频繁地进行释放操作,系统可能会产生大量的堆内存碎片。因而为了避免产生大量碎片,系统在释放堆空间的时候会尝试进行合并操作,即如果相邻的 chunk 空闲,则合并成一个大的空闲 chunk,包括向前合并以及向后合并。
首先判断能否向后合并,即合并相邻的低地址 chunk,具体操作是通过 prev_inuse(p)
检查后方 chunk 是否在使用中,如下:
/* consolidate backward */
if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}
如果可以合并,则更改 p
的大小和指向,然后通过 unlink()
宏将后方的 chunk 从其链接的 bin 中脱链。
然后判断能否向前合并,即合并相邻的高地址 chunk,如下:
if (nextchunk != av->top) {
/* get and clear inuse bit */
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
/* consolidate forward */
if (!nextinuse) {
unlink(av, nextchunk, bck, fwd);
size += nextsize;
} else
clear_inuse_bit_at_offset(nextchunk, 0);
/*
Place the chunk in unsorted chunk list. Chunks are
not placed into regular bins until after they have
been given one chance to be used in malloc.
*/
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "free(): corrupted unsorted chunks";
goto errout;
}
p->fd = fwd;
p->bk = bck;
if (!in_smallbin_range(size))
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
bck->fd = p;
fwd->bk = p;
set_head(p, size | PREV_INUSE);
set_foot(p, size);
check_free_chunk(av, p);
}
/*
If the chunk borders the current high end of memory,
consolidate into top
*/
else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
check_chunk(av, p);
}
从源代码可见向前合并比较复杂,它分为两种情况:
接下来还将判断,p
的尺寸(包含合并后的空间)是否大于或等于 FASTBIN_CONSOLIDATION_THRESHOLD
,其定义为:
#define FASTBIN_CONSOLIDATION_THRESHOLD (65536UL)
如果大于或等于,则调用 malloc_consolidate()
函数,这个函数会将 fastbin
中的每一个 chunk 合并后插入整理到 unsorted_bin
中。
if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
if (have_fastchunks(av))
malloc_consolidate(av);
...
}