rtree
实现
./include/jemalloc/internal/rtree.h
./src/rtree.c
用法
./test/unit/rtree.c
/* 这个基数树是为了实现连接元数据和jemalloc当前拥有的chunks这一单个目的,而量身定做的
* This radix tree implementation is tailored to the singular purpose of
* associating metadata with chunks that are currently owned by jemalloc.
*
*******************************************************************************
*/
结构体定义
struct rtree_node_elm_s {
union {
void *pun;
rtree_node_elm_t *child;
extent_node_t *val;
};
};
struct rtree_level_s {
/*
* A non-NULL subtree points to a subtree rooted along the hypothetical
* path to the leaf node corresponding to key 0. Depending on what keys
* have been used to store to the tree, an arbitrary combination of
* subtree pointers may remain NULL.
*
* Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. rtree每层的位数对2取对数是4
* This results in a 3-level tree, and the leftmost leaf can be directly 2^4 * 3 = 48
* accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding
* 0x00000000) can be accessed via subtrees[1], and the remainder of the
* tree can be accessed via subtrees[0].
*
* levels[0] : [ | 0x0001******** | 0x0002******** | ...]
*
* levels[1] : [ | 0x00000001**** | 0x00000002**** | ... ]
*
* levels[2] : [val(0x000000000000) | val(0x000000000001) | ...]
*
* This has practical implications on x64, which currently uses only the
* lower 47 bits of virtual address space in userland, thus leaving 用户态虚拟地址空间当前只使用了0-47位
* subtrees[0] unused and avoiding a level of tree traversal.
*/
48位写成16进制表示的可以写12位,上面的levels可以看做一个B树,多路搜索树,level[1]存储的是小于2^32次方的所有数
level[2]存储的是小于2^16次方的所有数,2^32次方到2^48在levels[0]中存储
union {
void *subtree_pun; 地址数组的uint表示
rtree_node_elm_t *subtree; 这一层下的地址数组地址
};
/* Number of key bits distinguished by this level. */
unsigned bits; 这一层的位数
/*
* Cumulative number of key bits distinguished by traversing to
* corresponding tree level.
*/
unsigned cumbits; 从第0层累积的位数
};
struct rtree_s {
rtree_node_alloc_t *alloc;
rtree_node_dalloc_t *dalloc;
unsigned height; unsigned 4 字节,32位
/*
* Precomputed table used to convert from the number of leading 0 key
* bits to which subtree level to start at.
*/
unsigned start_level[RTREE_HEIGHT_MAX]; 4 = 64 / 2^4(LG_RTREE_BITS_PER_LEVEL)
rtree_level_t levels[RTREE_HEIGHT_MAX]; 4
};
retree的相关函数
JEMALLOC_INLINE unsigned
rtree_start_level(rtree_t *rtree, uintptr_t key)
{
unsigned start_level;
if (unlikely(key == 0))
return (rtree->height - 1); key是0,返回3
start_level = rtree->start_level[lg_floor(key) >> LG_RTREE_BITS_PER_LEVEL];
key的最高位1的编号(从0开始,从右开始,最大63,最小1)右移4位,所以,lg_floor(key) >> LG_RTREE_BITS_PER_LEVEL最大值3,最小值0
第63位是1----第48位是1 对应3 (0xff ff ff ff ff ff ff ff----0x1 00 00 00 00 00 00)
第47位是1----第32位是1 对应2 (0xff ff ff ff ff ff----0x1 00 00 00 00)
第31位是1----第16位是1 对应1 (0xff ff ff ff----0x1 00 00) 64K
其他,对应0
assert(start_level < rtree->height);
return (start_level);
}
JEMALLOC_INLINE uintptr_t
rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
{
return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - rtree->levels[level].cumbits)) & ((ZU(1) << rtree->levels[level].bits) - 1));
level = 0, cumbits = 16, bits = 16, (key >> (64 - 16)) & (2^16 - 1), 取48 - 63位的数,是key的subkey
level = 1, cumbits = 32, bits = 16, (key >> (64 - 32)) & (2^16 - 1), 取32 - 47位的数
level = 2, cumbits = 43, bits = 11, (key >> (64 - 43)) & (2^11 - 1), 取21 - 31位的数
| 16 | 16 | 11 | 21(2M对齐) |
|63, 62, ... , 48|47, 46, ... , 32|31, ... 21|0, ................, 0|
}
在chunk_boot时创建了chunks_rtree,他是全局的
rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk, chunks_rtree_node_alloc, NULL)
opt_lg_chunk = LG_CHUNK_DEFAULT = 21(LG是对数的意思,2^21 = 2M,是1个chunk的大小)
bits = (1 << 6) - 21 = 64 - 21 = 43
/* Only the most significant bits of keys passed to rtree_[gs]et() are used. */
bool 在rtree中,key只有最有意义的位被使用
rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, rtree_node_dalloc_t *dalloc)
{
unsigned bits_in_leaf, height, i;
assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); bits>0, bits<=(8<<3 = 64)
bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL : (bits % RTREE_BITS_PER_LEVEL)
RTREE_BITS_PER_LEVEL = (ZU(1) << LG_RTREE_BITS_PER_LEVEL) = 1 << 4 = 16
如果bits能整除16,则bits_in_leaf=16,否则bits_in_leaf=bits除16的余数,这里是取余数,11
if (bits > bits_in_leaf) {
height = 1 + (bits - bits_in_leaf) / RTREE_BITS_PER_LEVEL; 1 + (43 - 11) / 16 = 3
if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits)一般不会发生
height++;
} else
height = 1;
assert((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf == bits);
rtree->alloc = alloc; chunks_rtree_node_alloc
rtree->dalloc = dalloc; NULL
rtree->height = height; 3
/* Root level. */根层
rtree->levels[0].subtree = NULL;
rtree->levels[0].bits = (height > 1) ? RTREE_BITS_PER_LEVEL : bits_in_leaf; 16
rtree->levels[0].cumbits = rtree->levels[0].bits; 16
/* Interior levels. */内部其他层
for (i = 1; i < height-1; i++) {
rtree->levels[i].subtree = NULL;
rtree->levels[i].bits = RTREE_BITS_PER_LEVEL; 16
rtree->levels[i].cumbits = rtree->levels[i-1].cumbits + RTREE_BITS_PER_LEVEL; 32
}
/* Leaf level. */叶子层
if (height > 1) {
rtree->levels[height-1].subtree = NULL;
rtree->levels[height-1].bits = bits_in_leaf; 11 最后一层的bits是零头
rtree->levels[height-1].cumbits = bits; 43 最好一层的cumbits是total bits
}
/* Compute lookup table to be used by rtree_start_level(). */
为rtree_start_level函数提供查询表,key对应的层数(先计算索引,再得到索引对应的层数)
for (i = 0; i < RTREE_HEIGHT_MAX; i++) {
rtree->start_level[i] = hmin(RTREE_HEIGHT_MAX - 1 - i, height - 1);
rtree->start_level[0] = 2
rtree->start_level[1] = 2
rtree->start_level[2] = 1
rtree->start_level[3] = 0
}
return (false);
}
chunk_register的第一步是rtree_set(&chunks_rtree, (uintptr_t)chunk, node)
rtree_set(&chunks_rtree, (uintptr_t)chunk, node)
key是chunk的地址,value是chunk的node字段
chunks_rtree在chunk_boot中创建
start_level = rtree_start_level(rtree, key);
key = chunk,140737326874624
start_level = rtree->start_level[lg_floor(key) >> LG_RTREE_BITS_PER_LEVEL];
key就是chunk的地址,0x7ffff6600000,他的最高位1是第46位(从右从0开始),索引对应2,level是1
RTREE_HEIGHT_MAX = 4
unsigned start_level[RTREE_HEIGHT_MAX];
2 2 1 0
rtree_level_t levels[RTREE_HEIGHT_MAX];
je_lg_floor(key) = 46
46 >> 4 = 2 索引是2,第31位是1----第16位是1 (4G-1 ---- 64K)
start_level = 1
node = rtree_subtree_read(rtree, start_level);
subtree = rtree_subtree_tryread(rtree, level);
if (!rtree_node_valid(subtree))看subtree的地址是否比0x1大
subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
atomic_add_p(p, NULL)交换并相加
uint64_t t = x;
asm volatile (
"lock; xaddq %0, %1;" 交换t(寄存器的值)和*p(内存),二者和送入*p
: "+r" (t), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return (t + x);
return (subtree);
if (unlikely(!rtree_node_valid(subtree)))
subtree = rtree_subtree_read_hard(rtree, level);
第一次,创建
rtree_node_init(rtree, level, &rtree->levels[level].subtree)
atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)
atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)
uint8_t success;
比较ax寄存器(参数c)和*p(内存),即判断*p是否等于NULL
如果相等,s(寄存器)的值覆盖*p,zf置1
如果不等,*p的值覆盖ax,zf置0
设置ax寄存器的值(success的值)为zf
如果相等,即开始*p == 0, 之后*p = s, success = 1,返回0,
不等success=0,返回1
asm volatile (
"lock; cmpxchgq %4, %0;"
"sete %1;"
: "=m" (*p), "=a" (success) /* Outputs. */
: "m" (*p), "a" (c), "r" (s) /* Inputs. */
: "memory" /* Clobbers. */
);
return (!(bool)success);
node = rtree->alloc(ZU(1) << rtree->levels[level].bits);
rtree->levels[level].bits = 16 第一层的bits是16,分配2^16个rtree_node_elm_t(2^16个地址)
这么多地址可以表示16位能表示的所有数字组合,地址数组存在rtree->levels[1].subtree
static rtree_node_elm_t * chunks_rtree_node_alloc(size_t nelms)
((rtree_node_elm_t *)base_alloc(nelms * sizeof(rtree_node_elm_t)));
(1<<16) * 8 = 512k
extent_node_init(&key, NULL, NULL, usize, false, false);
malloc_mutex_lock(&base_mtx);
node = extent_tree_szad_nsearch(&base_avail_szad, &key);
树上查找指定大小的node
if (node != NULL) {
/* Use existing space. */找到了先移除
extent_tree_szad_remove(&base_avail_szad, node);
} else {
/* Try to allocate more space. */
node = base_chunk_alloc(csize);没找到则分配
}
ret = extent_node_addr_get(node);
if (extent_node_size_get(node) > csize) {
extent_node_addr_set(node, (void *)((uintptr_t)ret + csize));
extent_node_size_set(node, extent_node_size_get(node) - csize);
extent_tree_szad_insert(&base_avail_szad, node);
得到部分内存,剩余创建一个node,放回树中
}
malloc_mutex_unlock(&base_mtx);
return ret;
if (node == NULL)
return (NULL);
atomic_write_p((void **)elmp, node);
atomic_write_uint64((uint64_t *)p, (uint64_t)x);
交换寄存器变量x和内存*p的值,即把node的地址赋给*elmp
asm volatile (
"xchgq %1, %0;" /* Lock is implied by xchgq. */
: "=m" (*p), "+r" (x) /* Outputs. */
: "m" (*p) /* Inputs. */
: "memory" /* Clobbers. */
);
返回node地址
return (subtree);
for (i = start_level; /**/; i++, node = child) { 从1开始,node开始指向上面分配的地址数组
subkey = rtree_subkey(rtree, key, i);
key就是chunk的地址,0x7ffff6600000,level是1,取32 - 47位的数
((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - rtree->levels[level].cumbits))
& ((ZU(1) << rtree->levels[level].bits) - 1));
1. 1 << 6 = 64
2. 64 - rtree->levels[1].cumbits = 64 - 32 = 32
3. key >> 32 = 32767
4. 1 << rtree->levels[1].bits = 1 << 16 = 65536
5. 32767 & (2^16 - 1) = 32767 (0x7fff)
第二次循环,level==2,rtree->levels[level].cumbits = 43,取21 - 31位的数
rtree->levels[1].bits = 11
(key >> (64 - 43)) & 2047 = 1971(0xf6(011|0) = 0x(11110110011) = 0x7b3)
if (i == rtree->height - 1) { rtree->height = 3(叶子层)
/* 第二次循环,进入这里
* node is a leaf, so it contains values rather than
* child pointers.
*/
子节点包含值而不是指针
这时的node是第一次的child,subkey = 1971
rtree_val_write(rtree, &node[subkey], val);
atomic_write_p(&elm->pun, val);
xchgq指令,交换内存和寄存器
这样的结果是:
&chunks_rtree->levels[1].subtree指向一个65536大小的数组,这个数组的第32767个元素的child
指向一个65536的数组,这个数组的第1971个元素里面的内容是该chunk地址对应的extent_node_t地址
return (false);
}
assert(i + 1 < rtree->height);
child = rtree_child_read(rtree, &node[subkey], i);
rtree_node_elm_t *child;
child = rtree_child_tryread(elm); elm是分配的数组的第32767个元素
child = elm->child; 元素的值现在是0(elm的值有child,pun,val三种表现形式)
if (!rtree_node_valid(child)) child的地址是否比1大
child = atomic_read_p(&elm->pun); 和NULL交换,并相加
return (child); 0x0
if (unlikely(!rtree_node_valid(child)))
child = rtree_child_read_hard(rtree, elm, level);
return rtree_node_init(rtree, level, &elm->child)
level = 1,
rtree_node_init上面已经分析过,创建大小为ZU(1) << rtree->levels[level].bits的rtree_node_elm_t数组
起始地址写在elm->child中,并返回
这样完了以后,&chunks_rtree->levels[1].subtree指向一个65536大小的数组,这个数组的第32767个元素的child
指向一个65536的数组
return (child);
if (child == NULL)
return (true);
}