4月1日,redis3.0-stable正式发布。引入了久违的cluster模式,同时进行了多处优化。本文,从源码级别对3.0和2.8.19进行对比,详细解释优化细节。由于能力及时间有限,只会对我已经读过的源码部分进行对比,同时不涉及cluster相关内容。
减少由于cache miss带来的内存读取,进一步提升缓存命中率,在某些场景下,大幅提升速度。
typedef struct redisObject {
unsigned type:4;
unsigned encoding:4;
unsigned lru:REDIS_LRU_BITS; /* lru time (relative to server.lruclock) */
int refcount;
void *ptr;
} robj;
//
// 分配一块内存,容纳robj, sds header, 字符串和'\0'
//
robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr)+len+1);
struct sdshdr *sh = (void*)(o+1);
o->type = REDIS_STRING;
o->encoding = REDIS_ENCODING_EMBSTR;
o->ptr = sh+1;
o->refcount = 1;
o->lru = LRU_CLOCK();
sh->len = len;
sh->free = 0;
if (ptr) {
//
// 拷贝字符串内容
//
memcpy(sh->buf,ptr,len);
sh->buf[len] = '\0';
} else {
memset(sh->buf,0,len+1);
}
return o;
#define REDIS_LRU_BITS 24
typedef struct redisObject {
unsigned type:4;
unsigned encoding:4;
unsigned lru:REDIS_LRU_BITS; /* lru time (relative to server.lruclock) */
int refcount;
void *ptr;
} robj;
lru占用24个bit,最大值是2^24 - 1,单位是秒。那么,lru有效范围是0.5年(2^24 / 365 / 86400),当一个key半年没有被访问,其lru会重新归0,而错过踢出。
server.lruclock = (server.unixtime/REDIS_LRU_CLOCK_RESOLUTION) &
REDIS_LRU_CLOCK_MAX;
REDIS_LRU_CLOCK_RESOLUTION表示lru的精度,设置的是秒。
for (k = 0; k < server.maxmemory_samples; k++) {
sds thiskey;
long thisval;
robj *o;
//
// 随机选择一个kv对
//
de = dictGetRandomKey(dict);
thiskey = dictGetKey(de);
/* When policy is volatile-lru we need an additional lookup
* to locate the real key, as dict is set to db->expires. */
if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
de = dictFind(db->dict, thiskey);
o = dictGetVal(de);
//
// 获取其lru值
//
thisval = estimateObjectIdleTime(o);
//
// 选择最久没有访问的key
//
/* Higher idle time is better candidate for deletion */
if (bestkey == NULL || thisval > bestval) {
bestkey = thiskey;
bestval = thisval;
}
}
踢出逻辑比较简单,随机选择maxmemory_samples个对象,选择其中lru值最小的作为要踢出的key。
maxmemory_samples可以配置,默认是3。
(mstime()/REDIS_LRU_CLOCK_RESOLUTION) & REDIS_LRU_CLOCK_MAX;
REDIS_LRU_CLOCK_RESOLUTION为1000,即精度是毫秒。
/* Redis database representation. There are multiple databases identified
* by integers from 0 (the default database) up to the max configured
* database. The database number is the 'id' field in the structure. */
typedef struct redisDb {
dict *dict; /* The keyspace for this DB */
dict *expires; /* Timeout of keys with a timeout set */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
dict *ready_keys; /* Blocked keys that received a PUSH */
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
struct evictionPoolEntry *eviction_pool; /* Eviction pool of keys */
int id; /* Database ID */
long long avg_ttl; /* Average TTL, just for stats */
} redisDb;
eviction_pool结构如下,包含一个key和其对应的lru时间。
#define REDIS_EVICTION_POOL_SIZE 16
struct evictionPoolEntry {
unsigned long long idle; /* Object idle time. */
sds key; /* Key name. */
};
eviction_pool组织成一个数组,长度为16,并且按照idle从小到大排序。看下lru踢出逻辑,同样是在freeMemoryIfNeed函数中:
struct evictionPoolEntry *pool = db->eviction_pool;
while(bestkey == NULL) {
//
// 填充eviction_pool,在第一次时随机选择16个key填充,
// 之后每次调用时,只需要填充一个key
//
evictionPoolPopulate(dict, db->dict, db->eviction_pool);
/* Go backward from best to worst element to evict. */
for (k = REDIS_EVICTION_POOL_SIZE-1; k >= 0; k--) {
if (pool[k].key == NULL) continue;
de = dictFind(dict,pool[k].key);
/* Remove the entry from the pool. */
sdsfree(pool[k].key);
/* Shift all elements on its right to left. */
memmove(pool+k,pool+k+1,
sizeof(pool[0])*(REDIS_EVICTION_POOL_SIZE-k-1));
/* Clear the element on the right which is empty
* since we shifted one position to the left. */
pool[REDIS_EVICTION_POOL_SIZE-1].key = NULL;
pool[REDIS_EVICTION_POOL_SIZE-1].idle = 0;
/* If the key exists, is our pick. Otherwise it is
* a ghost and we need to try the next element. */
if (de) {
bestkey = dictGetKey(de);
break;
} else {
/* Ghost... */
continue;
}
}
}
填充
eviction_pool时,随机选择16个key,并按照插入排序添加到pool中。填充完之后,选择pool的最后一个元素(idle最大)作为踢出对象。
oldvalue = value;
if ((incr < 0 && oldvalue < 0 && incr < (LLONG_MIN-oldvalue)) ||
(incr > 0 && oldvalue > 0 && incr > (LLONG_MAX-oldvalue))) {
addReplyError(c,"increment or decrement would overflow");
return;
}
// value是原来的值,加上增量
value += incr;
// 根据value,创建一个新的string类型的robj,
// 如果命中常量池,并不会创建新的对象,只有大于10000的才会创建。
new = createStringObjectFromLongLong(value);
// 需要一次hash查找,添加新对象或覆盖原有对象
if (o)
dbOverwrite(c->db,c->argv[1],new);
else
dbAdd(c->db,c->argv[1],new);
只要调用incr命令,就会存在一次hash查找。并且,对于大于10000的情况,需要创建新的robj
// 计算新的值
value += incr;
if (o && o->refcount == 1 && o->encoding == REDIS_ENCODING_INT &&
(value < 0 || value >= REDIS_SHARED_INTEGERS) &&
value >= LONG_MIN && value <= LONG_MAX)
{
// 如果该对象的encoding是REDIS_ENCODING_INT,并且不在常量池的范围内
// 同时引用计数小于1,则直接更改对象的值
new = o;
o->ptr = (void*)((long)value);
} else {
// 命中常量池,或者引用计数不唯一,按照以前的方式
new = createStringObjectFromLongLong(value);
if (o) {
dbOverwrite(c->db,c->argv[1],new);
} else {
dbAdd(c->db,c->argv[1],new);
}
}
在没有命中常量池并且引用计数为1的情况,直接修改对象的值,不需要hash查找以及创建新对象。其余情况,还走原来的流程。