环境说明:redis源码版本 5.0.3;我在阅读源码过程做了注释,git地址:https://gitee.com/xiaoangg/redis_annotation
参考书籍:《redis的设计与实现》
1.压缩列表的构成
以ziplistNew(创建一个空的压缩列表)接口为入口,可以更容易的理解压缩列表的构成
//压缩列表的头部大小 = 一个uint32(记录列表总长度) + 一个uint32(到表尾节点需要偏移的字节数) + uint16(记录总节点数量)
/* The size of a ziplist header: two 32 bit integers for the total
* bytes count and last item offset. One 16 bit integer for the number
* of items field. */
#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
//返回zl的占用字节数,因为zl的首32位是存的是列表总字节数,所以去zl指针首32位内容就是zl占用的总字节数
/* Return total bytes a ziplist is composed of. */
#define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl)))
//从zl头部往后偏移sizeof(uint32_t)字节,用于记录从头部到尾部的字节长度
/* Return the offset of the last item inside the ziplist. */
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
//从头部往后偏移 sizeof(uint32_t)*2)字节,用一个uint16_t记录这个压缩列表的总长度;
/* Return the length of a ziplist, or UINT16_MAX if the length cannot be
* determined without scanning the whole ziplist. */
#define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))
#define ZIP_END 255 /* Special "end of ziplist" entry. */
/* Create a new empty ziplist. */
unsigned char *ziplistNew(void) {
unsigned int bytes = ZIPLIST_HEADER_SIZE+1;
unsigned char *zl = zmalloc(bytes);
ZIPLIST_BYTES(zl) = intrev32ifbe(bytes); //zl的头32位记录 压缩列表占中总字节数
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE); //记录压缩列表的表尾节点到压缩列表启始节点有多少字节
ZIPLIST_LENGTH(zl) = 0;
zl[bytes-1] = ZIP_END; //最后一位用255标记结束
return zl;
}
2.压缩列表中节点的构成
2.1 previous_entry_len
上源码,计算previous_entry_len属性的位于ziplist.c 宏ZIP_DECODE_PREVLEN
//前一个条目的最大字节数;前一条目小于该值时,使用一个字节存储。 大与该值时使用4字节存储
#define ZIP_BIG_PREVLEN 254
/*
返回用于对上一条记录的长度进行编码的字节数。通过设置prevlensize返回长度。
例如返回5,说明ptr指向的前5个字节都是用来“编码记录”上一个节点的长度的
需要5个字节的话,第一个字节的值会被设置成254,后四个字节用来存储前一个节点的长度
*/
/* Return the number of bytes used to encode the length of the previous
* entry. The length is returned by setting the var 'prevlensize'. */
#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \
if ((ptr)[0] < ZIP_BIG_PREVLEN) { \
(prevlensize) = 1; \
} else { \
(prevlensize) = 5; \
} \
} while(0);
/*
解析ptr指向的节点的prevlensize和prevlen
prevlensize:记录上一个节点长度 所需 编码字节
prevlen:上一个节点的字节数量
*/
/* Return the length of the previous element, and the number of bytes that
* are used in order to encode the previous element length.
* 'ptr' must point to the prevlen prefix of an entry (that encodes the
* length of the previous entry in order to navigate the elements backward).
* The length of the previous entry is stored in 'prevlen', the number of
* bytes needed to encode the previous entry length are stored in
* 'prevlensize'. */
#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \
ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \
if ((prevlensize) == 1) { \
(prevlen) = (ptr)[0]; \
} else if ((prevlensize) == 5) { \
assert(sizeof((prevlen)) == 4); \
memcpy(&(prevlen), ((char*)(ptr)) + 1, 4); \
memrev32ifbe(&prevlen); \
} \
} while(0);
2.2 encoding
encoding记录所属节点的content的编码方式,以及长度。
#define ZIP_STR_MASK 0xc0 // 0xc0 = 0B1100 0000; 字符串的编码方式的掩码,
/* Macro to determine if the entry is a string. String entries never start
* with "11" as most significant bits of the first byte. */
#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)
编码 | encoding编码长度 | content保存的值 |
---|---|---|
1100 0000 | 1字节 | int_16 类型的整数 |
1101 0000 | 1字节 | int_32类型的整数 |
1110 0000 | 1字节 | int_64类型的整数 |
1111 0000 | 1字节 | 24位的有符号整数 |
1111 1110 | 1字节 | 8位有符号整数 |
1111 xxxx | 1字节 | 当值大于等于0 && 小于等于12时,使用该编码方式; 这时节点编码没有content属性; 值编码的到encoding属性中。 |
#define ZIP_INT_16B (0xc0 | 0 << 4) // 0B1100 0000 | 0B0 0000 = 1100 0000
#define ZIP_INT_32B (0xc0 | 1 << 4) // 0B1100 0000 |0B1 0000 = 1101 0000
#define ZIP_INT_64B (0xc0 | 2 << 4) // 0B1100 0000 |0B10 0000 = 1110 0000
#define ZIP_INT_24B (0xc0 | 3 << 4) // 0B1100 0000 |0B11 0000 = 1111 0000
#define ZIP_INT_8B 0xfe // 0B11111110
/*
检查“entry”指向的字符串是否可以编码为整数。
将整数值存储在“v”中,将其编码存储在“encoding”中
*/
/* Check if string pointed to by 'entry' can be encoded as an integer.
* Stores the integer value in 'v' and its encoding in 'encoding'. */
int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
long long value;
if (entrylen >= 32 || entrylen == 0) return 0;
if (string2ll((char*)entry,entrylen,&value)) {
/* Great, the string can be encoded. Check what's the smallest
* of our encoding types that can hold this value. */
if (value >= 0 && value <= 12) {
*encoding = ZIP_INT_IMM_MIN+value;
} else if (value >= INT8_MIN && value <= INT8_MAX) {// val值在 8位int范围内
*encoding = ZIP_INT_8B;
} else if (value >= INT16_MIN && value <= INT16_MAX) { // val值在 16位int范围内
*encoding = ZIP_INT_16B;
} else if (value >= INT24_MIN && value <= INT24_MAX) { // val值在 24位int范围内
*encoding = ZIP_INT_24B;
} else if (value >= INT32_MIN && value <= INT32_MAX) { // val值在 32位int范围内
*encoding = ZIP_INT_32B;
} else {
*encoding = ZIP_INT_64B;
}
*v = value;
return 1;
}
return 0;
}
2.3 content
content属性负责保存节点的值;
节点的值可以是整数,或是字节数组;
值的类型和长度存储在encoding属性中;
上面说过,每个节点都有一个previous_entry_len属性记录上一节点的长度;
如果上一节点长度小于254字节,那么previous_entry_len 属性需要一个字节来存储;
如果上一节点长度大于等于254字节,那么previous_entry_len 属性需要五个字节来存储;
那么问题来了,如果列表中有多个连续的节点且长度介于250到253之间; 在这个连续节点前插入一个 长度大于等于 254节点节点, 那么将导致后边连续的节点 都无法存储上一节点的长度,引发连锁内存分配操作;
连锁更新最坏的情况下,需要对压缩链接表进行N次空间分配操作;每次空间分配最坏复杂度是O(n);所以连锁更新的最坏时间复杂度是O(n*n);
上连锁更新源码:
/* When an entry is inserted, we need to set the prevlen field of the next
* entry to equal the length of the inserted entry. It can occur that this
* length cannot be encoded in 1 byte and the next entry needs to be grow
* a bit larger to hold the 5-byte encoded prevlen. This can be done for free,
* because this only happens when an entry is already being inserted (which
* causes a realloc and memmove). However, encoding the prevlen may require
* that this entry is grown as well. This effect may cascade throughout
* the ziplist when there are consecutive entries with a size close to
* ZIP_BIG_PREVLEN, so we need to check that the prevlen can be encoded in
* every consecutive entry.
*
* Note that this effect can also happen in reverse, where the bytes required
* to encode the prevlen field can shrink. This effect is deliberately ignored,
* because it can cause a "flapping" effect where a chain prevlen fields is
* first grown and then shrunk again after consecutive inserts. Rather, the
* field is allowed to stay larger than necessary, because a large prevlen
* field implies the ziplist is holding large entries anyway.
*
* The pointer "p" points to the first entry that does NOT need to be
* updated, i.e. consecutive fields MAY need an update. */
unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
size_t offset, noffset, extra;
unsigned char *np;
zlentry cur, next;
while (p[0] != ZIP_END) {
zipEntry(p, &cur);
rawlen = cur.headersize + cur.len;
rawlensize = zipStorePrevEntryLength(NULL,rawlen);
/* Abort if there is no next entry. */
if (p[rawlen] == ZIP_END) break;
zipEntry(p+rawlen, &next);
/* Abort when "prevlen" has not changed. */
if (next.prevrawlen == rawlen) break;
if (next.prevrawlensize < rawlensize) {
/* The "prevlen" field of "next" needs more bytes to hold
* the raw length of "cur". */
offset = p-zl;
extra = rawlensize-next.prevrawlensize;
zl = ziplistResize(zl,curlen+extra);
p = zl+offset;
/* Current pointer and offset for next element. */
np = p+rawlen;
noffset = np-zl;
/* Update tail offset when next element is not the tail element. */
if ((zl+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) != np) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
}
/* Move the tail to the back. */
memmove(np+rawlensize,
np+next.prevrawlensize,
curlen-noffset-next.prevrawlensize-1);
zipStorePrevEntryLength(np,rawlen);
/* Advance the cursor */
p += rawlen;
curlen += extra;
} else {
if (next.prevrawlensize > rawlensize) {
/* This would result in shrinking, which we want to avoid.
* So, set "rawlen" in the available bytes. */
zipStorePrevEntryLengthLarge(p+rawlen,rawlen);
} else {
zipStorePrevEntryLength(p+rawlen,rawlen);
}
/* Stop here, as the raw length of "next" has not changed. */
break;
}
}
return zl;
}
unsigned char *ziplistNew(void); //创建一个新的压缩列表
unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where); //将一个值添加到列表, 头,或者列表尾部
unsigned char *ziplistIndex(unsigned char *zl, int index); //返回给定索引上的节点
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);//返回给定节点的下一个节点
unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);//返回给定节点的上一个节点
unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval); //获取给定节点保存的值
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);//指定节点(p)后插入一个新节点(s),新节点s的长度(slen)
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p); //删除一个指定节点
unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num); //删除指定节点后连续多个节点
unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip); //查找并返回包含给定值的节点
unsigned int ziplistLen(unsigned char *zl); //返回列表包含的节点数量
size_t ziplistBlobLen(unsigned char *zl);//返回压缩列表占用的字节数量
void ziplistRepr(unsigned char *zl);
注:可以 ziplistInsert作为入口阅读;上边讲到的所有点都有在ziplistInsert函数中涉及;