第一个结构体arena_s
struct arena_s {
/* This arena's index within the arenas array. */
unsigned ind;该结构在arenas数组中的索引
/*
* Number of threads currently assigned to this arena. This field is
* protected by arenas_lock.
*/
unsigned nthreads;几个线程使用这个area
/*
* There are three classes of arena operations from a locking
* perspective: 3类area操作需要用到lock
* 1) Thread assignment (modifies nthreads) is protected by arenas_lock.
* 2) Bin-related operations are protected by bin locks.
* 3) Chunk- and run-related operations are protected by this mutex.
*/
malloc_mutex_t lock;
arena_stats_t stats;统计信息
/*
* List of tcaches for extant threads associated with this arena.
* Stats from these are merged incrementally, and at exit if
* opt_stats_print is enabled.
*/
ql_head(tcache_t) tcache_ql; 和这个area关联的不同线程组成的tcache列表
uint64_t prof_accumbytes;
/*
* PRNG state for cache index randomization of large allocation base
* pointers.
*/
uint64_t offset_state;
dss_prec_t dss_prec;
/*
* In order to avoid rapid chunk allocation/deallocation when an arena
* oscillates right on the cusp of needing a new chunk, cache the most
* recently freed chunk. The spare is left in the arena's chunk trees
* until it is deleted.
*
* There is one spare chunk per arena, rather than one spare total, in
* order to avoid interactions between multiple threads that could make
* a single spare inadequate.每个area一个空闲的chunk,避免快速分配,也避免多个线程竞争
*/
arena_chunk_t *spare;
/* Minimum ratio (log base 2) of nactive:ndirty. */
ssize_t lg_dirty_mult; nactive:ndirty对2取对数,nactive指活跃的arena_run
/* True if a thread is currently executing arena_purge(). */
bool purging;某个线程正在对该area执行arena_purge
/* Number of pages in active runs and huge regions. */
size_t nactive;活跃的arena_run和huge regions
/*
* Current count of pages within unused runs that are potentially
* dirty, and for which madvise(... MADV_DONTNEED) has not been called.
* By tracking this, we can institute a limit on how much dirty unused
* memory is mapped for each arena.
*/
size_t ndirty;
/*
* Size/address-ordered tree of this arena's available runs. The tree
* is used for first-best-fit run allocation.
*/根据大小地址排序的arena_run组成的树,使用最先最佳适合的方法分配arena_run
arena_avail_tree_t runs_avail;
/*area的脏内存管理
* Unused dirty memory this arena manages. Dirty memory is conceptually
* tracked as an arbitrarily interleaved LRU of dirty runs and cached
* chunks, but the list linkage is actually semi-duplicated in order to
* avoid extra arena_chunk_map_misc_t space overhead.
*概念上讲,脏内存跟踪可以是runs和chunks,实际为了避免arena_chunk_map_misc_t空间使用过头,是半复制
* LRU-----------------------------------------------------------MRU
*
* /-- arena ---\
* | |
* | |
* |------------| /- chunk -\
* ...->|chunks_cache|<--------------------------->| /----\ |<--...
* |------------| | |node| |
* | | | | | |
* | | /- run -\ /- run -\ | | | |
* | | | | | | | | | |
* | | | | | | | | | |
* |------------| |-------| |-------| | |----| |
* ...->|runs_dirty |<-->|rd |<-->|rd |<---->|rd |<----...
* |------------| |-------| |-------| | |----| |
* | | | | | | | | | |
* | | | | | | | \----/ |
* | | \-------/ \-------/ | |
* | | | |
* | | | |
* \------------/ \---------/
*/
arena_runs_dirty_link_t runs_dirty;双向链表
extent_node_t chunks_cache;1个extent_node代表1块区域
/* Extant huge allocations. */
ql_head(extent_node_t) huge; 大块内存链表
/* Synchronizes all huge allocation/update/deallocation. */
malloc_mutex_t huge_mtx;大块内存锁
/*
* Trees of chunks that were previously allocated (trees differ only in
* node ordering). These are used when allocating chunks, in an attempt
* to re-use address space. Depending on function, different tree
* orderings are needed, which is why there are two trees with the same
* contents.
*/排序的extent_node_t组成的树,2个一组,不同的是排序方式,前者根据size大小,后者根据地址
extent_tree_t chunks_szad_cached;
rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link, extent_szad_comp)
extent_tree_t chunks_ad_cached;
rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, ad_link, extent_ad_comp)
extent_tree_t chunks_szad_retained;
extent_tree_t chunks_ad_retained;
malloc_mutex_t chunks_mtx;
/* Cache of nodes that were allocated via base_alloc(). */
ql_head(extent_node_t) node_cache;通过base_alloc分配的extent_node列表
malloc_mutex_t node_cache_mtx;
/* User-configurable chunk hook functions. */
chunk_hooks_t chunk_hooks;回调函数,分配,释放
/* bins is used to store trees of free regions. */
arena_bin_t bins[NBINS]; bins数组,大小是39
};
第二个类型
typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t;
由arena_chunk_map_misc_t构成的红黑树,它作为area的一个字段
/*
* Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
* like arena_chunk_map_bits_t. Two separate arrays are stored within each
* chunk header in order to improve cache locality.
*/
每一个arena_chunk_map_misc_t和arena_chunk_map_bits_t一样,对应chunk中的1页
为了改进cache本地化,2个独立的数组存储在chunk的头部
struct arena_chunk_map_misc_s {
/*
* Linkage for run trees. There are two disjoint uses:
* 2种不相交的用法
* 1) arena_t's runs_avail tree.用于runs_avail树
* 2) arena_run_t conceptually uses this linkage for in-use non-full
* runs, rather than directly embedding linkage.
*/概念上连接正在使用的没有满的runs,而不是直接嵌入式连接
rb_node(arena_chunk_map_misc_t) rb_link;
union {
/* Linkage for list of dirty runs. */
arena_runs_dirty_link_t rd;
/* Profile counters, used for large object runs. */
union {
void *prof_tctx_pun;
prof_tctx_t *prof_tctx;
};
/* Small region run metadata. */
arena_run_t run;
};
};
/* Each element of the chunk map corresponds to one page within the chunk. */
struct arena_chunk_map_bits_s {每一个map_bits对应1页
/*
* Run address (or size) and various flags are stored together. The bit
* layout looks like (assuming 32-bit system):
* 运行地址或大小,各种标志,存储在一起
* ???????? ???????? ???nnnnn nnndumla
*
* ? : Unallocated: Run address for first/last pages, unset for internal
* pages. 未分配的,第一页或者最后一页的运行地址,对于内部页不设置
* Small: Run page offset.
* Large: Run page count for first page, unset for trailing pages.
* n : binind for small size class, BININD_INVALID for large size class. bin索引
* d : dirty? 是否是dirty
* u : unzeroed? 是否是没有置0
* m : decommitted? 不提交
* l : large? 是否是large
* a : allocated? 已分配
*
* Following are example bit patterns for the three types of runs.
* 下面是3种类型的位模式举例
* p : run page offset
* s : run size
* n : binind for size class; large objects set these to BININD_INVALID
* x : don't care
* - : 0
* + : 1
* [DUMLA] : bit set
* [dumla] : bit unset
*
* Unallocated (clean):
* ssssssss ssssssss sss+++++ +++dum-a 13位以上表示run size 12-5是1,第1位是0,表示非large
* xxxxxxxx xxxxxxxx xxxxxxxx xxx-Uxxx 第4位是0,非dirty,第三位是U,没置0,不关心其他位
* ssssssss ssssssss sss+++++ +++dUm-a 13位以上表示run size 12-5是1,第三位是U,没置0,第1位是0,表示非large
*
* Unallocated (dirty):
* ssssssss ssssssss sss+++++ +++D-m-a 13位以上表示run size 12-5是1,第4位是1,表示Dirty,第3位0,表示清0,第1位是0,表示非large
* xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
* ssssssss ssssssss sss+++++ +++D-m-a
*
* Small:
* pppppppp pppppppp pppnnnnn nnnd---A 13位以上表示page offset,12-5表示bin index,非dirty,已分配
* pppppppp pppppppp pppnnnnn nnn----A
* pppppppp pppppppp pppnnnnn nnnd---A
*
* Large:
* ssssssss ssssssss sss+++++ +++D--LA 13位以上表示run size 12-5是1,Dirty,Large,Allocated
* xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
* -------- -------- ---+++++ +++D--LA
*
* Large (sampled, size <= LARGE_MINCLASS):
* ssssssss ssssssss sssnnnnn nnnD--LA
* xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
* -------- -------- ---+++++ +++D--LA
*
* Large (not sampled, size == LARGE_MINCLASS):
* ssssssss ssssssss sss+++++ +++D--LA
* xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
* -------- -------- ---+++++ +++D--LA
*/
size_t bits;
#define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
#define CHUNK_MAP_LARGE ((size_t)0x02U)
#define CHUNK_MAP_STATE_MASK ((size_t)0x3U)
#define CHUNK_MAP_DECOMMITTED ((size_t)0x04U)
#define CHUNK_MAP_UNZEROED ((size_t)0x08U)
#define CHUNK_MAP_DIRTY ((size_t)0x10U)
#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1cU)
#define CHUNK_MAP_BININD_SHIFT 5
#define BININD_INVALID ((size_t)0xffU)
#define CHUNK_MAP_BININD_MASK (BININD_INVALID << CHUNK_MAP_BININD_SHIFT)
#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK
#define CHUNK_MAP_RUNIND_SHIFT (CHUNK_MAP_BININD_SHIFT + 8)
#define CHUNK_MAP_SIZE_SHIFT (CHUNK_MAP_RUNIND_SHIFT - LG_PAGE)
#define CHUNK_MAP_SIZE_MASK \
(~(CHUNK_MAP_BININD_MASK | CHUNK_MAP_FLAGS_MASK | CHUNK_MAP_STATE_MASK))
};
第三个类型,arena_run_t
struct arena_run_s {
/* Index of bin this run is associated with. */
szind_t binind; bin index 索引大小信息
/* Number of free regions in run. */
unsigned nfree; 空闲region数量
/* Per region allocated/deallocated bitmap. */
bitmap_t bitmap[BITMAP_GROUPS_MAX]; 大小是9,在malloc节中有详细介绍
};
第四个类型arena_chunk_t
/* Arena chunk header. */
struct arena_chunk_s {
/*
* A pointer to the arena that owns the chunk is stored within the node.
* This field as a whole is used by chunks_rtree to support both
* ivsalloc() and core-based debugging.
*/指向拥有该chunk的area的指针,存储在node中,这个字段被chunks_rtree使用,来支持
extent_node_t node;
/*
* Map of pages within chunk that keeps track of free/large/small. The
* first map_bias entries are omitted, since the chunk header does not
* need to be tracked in the map. This omission saves a header page
* for common chunk sizes (e.g. 4 MiB).
*/ 1个chunk2M,很多页,一个map_bits对应1页
arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */
};
第五个类型extent_node_t,表示1个范围节点,一片内存
/* Tree of extents. Use accessor functions for en_* fields. */
struct extent_node_s {
/* Arena from which this extent came, if any. */
arena_t *en_arena;节点所属的area
/* Pointer to the extent that this tree node is responsible for. */
void *en_addr;内存起始地址
/* Total region size. */
size_t en_size;内存大小
/*
* The zeroed flag is used by chunk recycling code to track whether
* memory is zero-filled.
*/
bool en_zeroed;是否置0了
/*
* True if physical memory is committed to the extent, whether
* explicitly or implicitly as on a system that overcommits and
* satisfies physical memory needs on demand via soft page faults.
*/
bool en_committed;
/*
* The achunk flag is used to validate that huge allocation lookups
* don't return arena chunks.
*/巨块分配查找
bool en_achunk;
/* Profile counters, used for huge objects. */
prof_tctx_t *en_prof_tctx;
/* Linkage for arena's runs_dirty and chunks_cache rings. */
arena_runs_dirty_link_t rd; 和前面的area的这个字段相连
qr(extent_node_t) cc_link; 和其他node相连
union {
/* Linkage for the size/address-ordered tree. */
rb_node(extent_node_t) szad_link;按大小排序的树
/* Linkage for arena's huge and node_cache lists. */
ql_elm(extent_node_t) ql_link;
};
/* Linkage for the address-ordered tree. */按地址排序
rb_node(extent_node_t) ad_link; 组成node树,在area结构体重有4个extent_tree_t
};
typedef rb_tree(extent_node_t) extent_tree_t;
tcache_t是存在arena->tcache_ql列表中
struct tcache_s {
ql_elm(tcache_t) link; /* Used for aggregating stats. */
uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
unsigned ev_cnt; /* Event count since incremental GC. */
szind_t next_gc_bin; /* Next bin to GC. */
tcache_bin_t tbins[1]; /* Dynamically sized. */
/*
* The pointer stacks associated with tbins follow as a contiguous
* array. During tcache initialization, the avail pointer in each
* element of tbins is initialized to point to the proper offset within
* this array.
*/
};
struct tcache_bin_s {
tcache_bin_stats_t tstats;
int low_water; /* Min # cached since last GC. */
unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
unsigned ncached; /* # of cached objects. */
void **avail; /* Stack of available objects. */
};
要分配内存,则从tcache中分配,tcache一开始不存在,则创建tcache
分配tcache的过程:
size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
含义是分配1个tcache_t结构,这个tcache的tbins数组大小是44
size = 32 + 32 * 44 = 1440
stack_offset = size;
size += stack_nelms * sizeof(void *); 3696 * 8 29568 31008,调整大小后是32768
tcache_bin_t的avail字段,是个void*数组,各个tcache_bin_t的avail数组长度不一
bin的index越大,nregs越小,这是大趋势,也不是完全符合,在bin_info_init时会初始化nregs
for (i = 0; i < NBINS; i++) {
arena_bin_info[i].nregs << 1
<= TCACHE_NSLOTS_SMALL_MIN = 20 (19,23,25,27-38)
tcache_bin_info[i].ncached_max = 20
<= TCACHE_NSLOTS_SMALL_MAX = 200(7,11,13,15-18,20-22,24,26)
tcache_bin_info[i].ncached_max = arena_bin_info[i].nregs << 1
其他
tcache_bin_info[i].ncached_max = 200(0-6,8-10,12,14)
stack_nelms += tcache_bin_info[i].ncached_max;
没有遍历完的,nhbins是44,NBINS是39
for (; i < nhbins; i++) {
tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
stack_nelms += tcache_bin_info[i].ncached_max;
}
创建完tcache后会初始化每个tcache_bin_info的avail数组,
for (i = 0; i < nhbins; i++) {
tcache->tbins[i].lg_fill_div = 1;
tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
(uintptr_t)stack_offset);
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
}
内存布局是
tcache_t
...,tcache_bin_info[0],tcache_bin_info[1],...tcache_bin_info[43],avail0,avail1,...,avail43
arena_malloc_large
arena_run_t *run = arena_run_alloc_large(arena, usize + large_pad, zero);
arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
tcache_t *tcache = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) + random_offset);
/*
* Compute a uniformly distributed offset within the first page
* that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
* for 4 KiB pages and 64-byte cachelines.
*/
prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state,
UINT64_C(6364136223846793009),
UINT64_C(1442695040888963409)); 20位的数
在第一页内,计算一个统一的,64位对齐的,分布式偏移
r = arena->offset_state * 6364136223846793009 + 1442695040888963409
arena->offset_state = r
r >>= (64 - (lg_range));
64 - (12 - 6)
random_offset = ((uintptr_t)r) << LG_CACHELINE;
run作为arena_chunk_map_misc_t的一个字段,可以得到arena_chunk_map_misc_t的地址,根据结构体的定义,一个page对应一个misc
arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm)函数
1. 消掉miscelm的零头得到chunk的地址(2M对齐)
2. arena_miscelm_to_pageind(miscelm)
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
size_t pageind =
((uintptr_t)miscelm - ((uintptr_t)chunk + map_misc_offset))
/ sizeof(arena_chunk_map_misc_t)
+ map_bias;
map_misc_offset = 4096,chunk之后的第1页(从0开始)是miscelm数组,miscelm数组的第i个元素对应(i + map_bias)页
3. 最终的地址是((void *)((uintptr_t)chunk + (pageind << LG_PAGE))),这是某一页的起始地址
map_bias为什么是13,和arena_chunk_map_misc_t的结构体大小有关,它的大小是96,1个chunk有512页,要保证每页都有一个arena_chunk_map_misc_t
499 * 96 = 47904 47904 /4096= 11(页),如果要管理500页,那么需要占用12页的大小,第0页要放arena_chunk_s,以及他的动态字段map_bits
在arena_boot函数中有精确计算
/*
* Compute the header size such that it is large enough to contain the
* page map. The page map is biased to omit entries for the header
* itself, so some iteration is necessary to compute the map bias.
*计算头部大小,这个大小足够包含page map,page map相对于省略的实体有间距,因此需要迭代计算map_bias
* 1) Compute safe header_size and map_bias values that include enough
* space for an unbiased page map. 如果要管理chunk_npages这么多页,需要多少的页来存储管理信息(map_bits信息和misc信息)
* 2) Refine map_bias based on (1) to omit the header pages in the page
* map. The resulting map_bias may be one too small.总共才chunk_npages,所以管理不了那么多,尝试值管理(chunk_npages-map_bias)页需要多少管理空间
* 3) Refine map_bias based on (2). The result will be >= the result
* from (2), and will always be correct.实际上第二次计算已经能满足空间了,这次再计算校正header_size,再得到新的map_bias
*/
map_bias = 0;
for (i = 0; i < 3; i++) {
size_t header_size = offsetof(arena_chunk_t, map_bits) +
((sizeof(arena_chunk_map_bits_t) + sizeof(arena_chunk_map_misc_t)) * (chunk_npages-map_bias));
map_bias = (header_size + PAGE_MASK) >> LG_PAGE;
}
第一次,header_size=53352,map_bias = 14
第二次,header_size=51896,je_map_bias=13
第三次,header_size=52000,je_map_bias=13
map_misc_offset = offsetof(arena_chunk_t, map_bits) + sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias);
((size_t)&(((arena_chunk_t *)0)->map_bits)) = 104
sizeof(arena_chunk_map_bits_t) * (je_chunk_npages-je_map_bias) = 3992
4096,一点对齐的操作都没有啊
tcache的地址 = 这页的起始地址 + random_offset
现在看一个miscelm对应一个run,这个大小可能不止1页
chunk = arena_chunk_alloc(arena);
if (chunk != NULL) {
run = &arena_miscelm_get(chunk, map_bias)->run;
if (arena_run_split_large(arena, run, size, zero))
run = NULL;
return (run);
}
arena_chunk_alloc(arena)返回arena_chunk_t指针,它的地址就是2M对齐的
刚创建的chunk第一个可用的页是(0 + map_bias),所以取得该页的miscelm,给其run赋值,返回这个run,就得到了上面arena_run_t
chunk = arena_chunk_init_hard(arena);
chunk = arena_chunk_alloc_internal(arena, &zero, &commit);
chunk = chunk_alloc_cache(arena, &chunk_hooks, NULL, chunksize, chunksize, zero, true);
chunk_recycle(...&arena->chunks_szad_cached,&arena->chunks_ad_cached...)
先尝试回收,失败则真正创建,回收时commit=true,其他情况zero和commit都为false
chunk = arena_chunk_alloc_internal_hard(arena, &chunk_hooks, zero, commit);
chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_hooks, NULL, chunksize, chunksize, zero, commit);
chunk_hooks->alloc(new_addr, size, alignment, zero, commit, arena->ind);
使用chunk_hooks_default的回调函数chunk_alloc_default
arena = chunk_arena_get(arena_ind);
ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, commit, arena->dss_prec);
ret = chunk_recycle(arena, &chunk_hooks, &arena->chunks_szad_retained, &arena->chunks_ad_retained...
先尝试回收,失败则真正创建
ret = chunk_alloc_mmap(size, alignment, zero, commit)
调用mmap创建,先直接创建,看看是不是2M对齐的,不是则释放,重新映射4M-4k大小的,这个大小的肯定可以找到
2M对齐的连续空间
arena_chunk_register(arena, chunk, *zero)
extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
给chunk的node的各个字段赋值,arena地址(开天辟地时创建的内存),节点地址(就是chunk地址),大小(2M)
zero,false,commit,true
extent_node_achunk_set(&chunk->node, true); achunk,true
chunk_register(chunk, &chunk->node);
rtree_set(&chunks_rtree, (uintptr_t)chunk, node)
arena_mapbits_unallocated_set(chunk, map_bias, arena_maxrun, flag_unzeroed | flag_decommitted);
设置第13页(从0开始)的大小是arena_maxrun,即0x1f3000 1996k = 1f3页 499页 2M的chunk是0x200页,512页,也就是让了13页出来
arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun, flag_unzeroed);
设置第511页的大小也是arena_maxrun
arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);插入1个run,这个run在chunk中,起始页是map_bias,总页数是chunk_npages-map_bias
arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk, pageind));这里插入的实际是miscelm
关于rtree和arena_run_split_large(arena, run, size, zero)待续