Memcached源码分析 - 网络模型(1)
Memcached源码分析 - 命令解析(2)
Memcached源码分析 - 数据存储(3)
Memcached源码分析 - 增删改查操作(4)
Memcached源码分析 - 内存存储机制Slabs(5)
Memcached源码分析 - LRU淘汰算法(6)
Memcached源码分析 - 消息回应(7)
这篇文章的目的是想把Memcached的内存管理机制讲解清楚,在前面的文章中我们已经提交到Item是Memcached中存储的数据单元,而Item的内存分配策略就是本章的重点了。
Memcached的存储机制Slabs,所有Item相关的内存分配都由Slabs来实现的。
typedef struct _stritem {
/* Protected by LRU locks */
struct _stritem *next;
struct _stritem *prev;
/* Rest are protected by an item lock */
struct _stritem *h_next; /* hash chain next */
rel_time_t time; /* least recent access */
rel_time_t exptime; /* expire time */
int nbytes; /* size of data */
unsigned short refcount;
uint8_t nsuffix; /* length of flags-and-length string */
uint8_t it_flags; /* ITEM_* above */
uint8_t slabs_clsid;/* which slab class we're in */
uint8_t nkey; /* key length, w/terminating null and padding */
/* this odd type prevents type-punning issues when we do
* the little shuffle to save space when not using CAS. */
union {
uint64_t cas;
char end;
} data[];
/* if it_flags & ITEM_CAS we have 8 bytes CAS */
/* then null-terminated key */
/* then " flags length\r\n" (no terminating null) */
/* then data with terminating \r\n (no terminating null; it's binary!) */
} item;
static void settings_init(void) {
settings.use_cas = true;
settings.access = 0700;
settings.port = 11211;
settings.udpport = 0;
/* By default this string should be NULL for getaddrinfo() */
settings.inter = NULL;
settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */
settings.maxconns = 1024; /* to limit connections-related memory to about 5MB */
settings.verbose = 0;
settings.oldest_live = 0;
settings.oldest_cas = 0; /* supplements accuracy of oldest_live */
settings.evict_to_free = 1; /* push old items out of cache when memory runs out */
settings.socketpath = NULL; /* by default, not using a unix socket */
settings.factor = 1.25;
settings.chunk_size = 48; /* space for a modest key and value */
settings.num_threads = 4; /* N workers */
settings.num_threads_per_udp = 0;
settings.prefix_delimiter = ':';
settings.detail_enabled = 0;
settings.reqs_per_event = 20;
settings.backlog = 1024;
settings.binding_protocol = negotiating_prot;
settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */
settings.slab_page_size = 1024 * 1024; /* chunks are split from 1MB pages. */
settings.slab_chunk_size_max = settings.slab_page_size / 2;
settings.sasl = false;
settings.maxconns_fast = true;
settings.lru_crawler = false;
settings.lru_crawler_sleep = 100;
settings.lru_crawler_tocrawl = 0;
settings.lru_maintainer_thread = false;
settings.lru_segmented = true;
settings.hot_lru_pct = 20;
settings.warm_lru_pct = 40;
settings.hot_max_factor = 0.2;
settings.warm_max_factor = 2.0;
settings.inline_ascii_response = false;
settings.temp_lru = false;
settings.temporary_ttl = 61;
settings.idle_timeout = 0; /* disabled */
settings.hashpower_init = 0;
settings.slab_reassign = true;
settings.slab_automove = 1;
settings.slab_automove_ratio = 0.8;
settings.slab_automove_window = 30;
settings.shutdown_command = false;
settings.tail_repair_time = TAIL_REPAIR_TIME_DEFAULT;
settings.flush_enabled = true;
settings.dump_enabled = true;
settings.crawls_persleep = 1000;
settings.logger_watcher_buf_size = LOGGER_WATCHER_BUF_SIZE;
settings.logger_buf_size = LOGGER_BUF_SIZE;
settings.drop_privileges = false;
#ifdef MEMCACHED_DEBUG
settings.relaxed_privileges = false;
#endif
}
slabclass_t结构的核心字段已经进行注释了,核心void *slots表示空间的chunk列表,其实也就是item列表。
typedef struct {
unsigned int size; //当前的slabclass存储最大多大的item
unsigned int perslab; //每一个slab上可以存储多少个item.每个slab大小为1M, 可以存储的item个数根据size决定。
void *slots; //空闲列表,其实是连续的内存分配,但是还是通过item结构中的item->next和item->prev连建立链表结构关系
unsigned int sl_curr; //当sl_curr=0的时候,说明已经没有空闲的item,需要分配一个新的slab(每个1M,可以切割成N多个Item结构)
unsigned int slabs; //总共分配多少个slabs
void **slab_list; //分配的slab链表
unsigned int list_size; /* size of prev array */
size_t requested; //总共请求的总bytes
} slabclass_t;
从这里开始进入slabclass内存初始化的核心逻辑,整体过程如下:
slabclass 划分数据空间
slab 内存分配单位
对应到源码当中:
#define POWER_SMALLEST 1
#define SLAB_GLOBAL_PAGE_POOL 0
#define POWER_LARGEST 256 /* actual cap is 255 */
#define MAX_NUMBER_OF_SLAB_CLASSES (63 + 1)
static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];
void slabs_init(const size_t limit, const double factor, const bool prealloc, const uint32_t *slab_sizes) {
int i = POWER_SMALLEST - 1;
// 设置每个slabclass中item的size大小
unsigned int size = sizeof(item) + settings.chunk_size;
memset(slabclass, 0, sizeof(slabclass));
// 每次以1.5倍的大小增加slabclass中item的size大小
while (++i < MAX_NUMBER_OF_SLAB_CLASSES-1) {
if (slab_sizes != NULL) {
if (slab_sizes[i-1] == 0)
break;
size = slab_sizes[i-1];
} else if (size >= settings.slab_chunk_size_max / factor) {
// 如果已经达到了最大值settings.slab_chunk_size_max
break;
}
// 执行对齐操作
if (size % CHUNK_ALIGN_BYTES)
size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
//每个slabclass[i]存储最大多大的item,size表示item的大小
slabclass[i].size = size;
slabclass[i].perslab = settings.slab_page_size / slabclass[i].size;
// 这里以1.5倍的速率增加slabclass中item的大小
if (slab_sizes == NULL)
size *= factor;
}
// 根据settings.slab_chunk_size_max的值设置最多多少个slabclass的个数
power_largest = i;
slabclass[power_largest].size = settings.slab_chunk_size_max;
slabclass[power_largest].perslab = settings.slab_page_size / settings.slab_chunk_size_max;
{
char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
if (t_initial_malloc) {
mem_malloced = (size_t)atol(t_initial_malloc);
}
}
// 进入slabclass的分配
if (prealloc && do_slab_prealloc) {
slabs_preallocate(power_largest);
}
}
static void slabs_preallocate (const unsigned int maxslabs) {
int i;
unsigned int prealloc = 0;
// 遍历每个slabclass进行初始化,这里会综合考虑最大的支持的slabclass和根据内存分配设置的合适的个数,
//这里的POWER_SMALLEST的值是1,slabclass的数组的第一个元素是特殊意义的
for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
if (++prealloc > maxslabs)
return;
// 进行slabclass的分配
if (do_slabs_newslab(i) == 0) {
exit(1);
}
}
}
// 这里的id就是slabclass的下标,这里已经进入到单个slabclass初始化过程了
static int do_slabs_newslab(const unsigned int id) {
slabclass_t *p = &slabclass[id];
slabclass_t *g = &slabclass[SLAB_GLOBAL_PAGE_POOL];
// 这里我们在计算slab的大小
int len = (settings.slab_reassign || settings.slab_chunk_size_max != settings.slab_page_size)
? settings.slab_page_size
: p->size * p->perslab;
char *ptr;
if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0
&& g->slabs == 0)) {
mem_limit_reached = true;
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}
// 通过memory_allocate函数分配len大小的内存空间
if ((grow_slab_list(id) == 0) ||
(((ptr = get_page_from_global_pool()) == NULL) &&
((ptr = memory_allocate((size_t)len)) == 0))) {
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}
// 初始化该块内存为0
memset(ptr, 0, (size_t)len);
split_slab_page_into_freelist(ptr, id);
// slab_list保存slabs个数的slab的指针
p->slab_list[p->slabs++] = ptr;
MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);
return 1;
}
//初始化单个slabclass下第一个slab当中的item。
static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {
slabclass_t *p = &slabclass[id];
int x;
//遍历slab下的perslab个数的size大小的内存块,将所有的内存块挂到free_list当中
for (x = 0; x < p->perslab; x++) {
do_slabs_free(ptr, 0, id);
ptr += p->size;
}
}
//初始化slab当中单个item的内存空间
static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
slabclass_t *p;
item *it;
p = &slabclass[id];
it = (item *)ptr;
if ((it->it_flags & ITEM_CHUNKED) == 0) {
//省略相关代码
} else {
do_slabs_free_chunked(it, size);
}
return;
}
// 采用链表的头部插入法生成p->slots,也就是说slab当中分配的item以链表的形式进行连接并保存到p->slots当中
static void do_slabs_free_chunked(item *it, const size_t size) {
item_chunk *chunk = (item_chunk *) ITEM_data(it);
slabclass_t *p;
it->it_flags = ITEM_SLABBED;
it->slabs_clsid = 0;
it->prev = 0;
// header object's original classid is stored in chunk.
p = &slabclass[chunk->orig_clsid];
if (chunk->next) {
chunk = chunk->next;
chunk->prev = 0;
} else {
// header with no attached chunk
chunk = NULL;
}
// return the header object.
// TODO: This is in three places, here and in do_slabs_free().
it->prev = 0;
it->next = p->slots;
if (it->next) it->next->prev = it;
p->slots = it;
p->sl_curr++;
// TODO: macro
p->requested -= it->nkey + 1 + it->nsuffix + sizeof(item) + sizeof(item_chunk);
if (settings.use_cas) {
p->requested -= sizeof(uint64_t);
}
item_chunk *next_chunk;
while (chunk) {
assert(chunk->it_flags == ITEM_CHUNK);
chunk->it_flags = ITEM_SLABBED;
p = &slabclass[chunk->slabs_clsid];
chunk->slabs_clsid = 0;
next_chunk = chunk->next;
chunk->prev = 0;
chunk->next = p->slots;
if (chunk->next) chunk->next->prev = chunk;
p->slots = chunk;
p->sl_curr++;
p->requested -= chunk->size + sizeof(item_chunk);
chunk = next_chunk;
}
return;
}
其实两幅图都是对的,但是侧重点稍微不一样。
slabs_clsid方法,主要通过item的长度来查询应该适合存放到哪个slabsclass_t上面。找的逻辑就是简单的找到第一个可以容纳得下size大小的slabclass。
//通过item的size,选择当前的item适合放在哪个slab class中
unsigned int slabs_clsid(const size_t size) {
int res = POWER_SMALLEST; //从id = 1开始查找
//slabclass这个结构上的size会存储该class适合多大的item存储
//例如
//slabclass[0] 存储96byte
//slabclass[1] 存储120byte
//slabclass[2] 存储150byte
//则,如果存储的item等于109byte,则存储在slabclass[1]上
if (size == 0)
return 0;
while (size > slabclass[res].size)
if (res++ == power_largest) /* won't fit in the biggest slab */
return 0;
return res;
}
Memcached源码分析 - Memcached源码分析之存储机制Slabs
Memcached内存存储