redis源码浅析--六-压缩列表

环境说明:redis源码版本 5.0.3;我在阅读源码过程做了注释,git地址:https://gitee.com/xiaoangg/redis_annotation

参考书籍:《redis的设计与实现》

一.数据结构

1.压缩列表的构成

redis源码浅析--六-压缩列表_第1张图片

以ziplistNew(创建一个空的压缩列表)接口为入口,可以更容易的理解压缩列表的构成

//压缩列表的头部大小 = 一个uint32(记录列表总长度) + 一个uint32(到表尾节点需要偏移的字节数) + uint16(记录总节点数量)
/* The size of a ziplist header: two 32 bit integers for the total
 * bytes count and last item offset. One 16 bit integer for the number
 * of items field. */
#define ZIPLIST_HEADER_SIZE     (sizeof(uint32_t)*2+sizeof(uint16_t))


//返回zl的占用字节数,因为zl的首32位是存的是列表总字节数,所以去zl指针首32位内容就是zl占用的总字节数
/* Return total bytes a ziplist is composed of. */
#define ZIPLIST_BYTES(zl)       (*((uint32_t*)(zl)))


//从zl头部往后偏移sizeof(uint32_t)字节,用于记录从头部到尾部的字节长度
/* Return the offset of the last item inside the ziplist. */
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))


//从头部往后偏移 sizeof(uint32_t)*2)字节,用一个uint16_t记录这个压缩列表的总长度;
/* Return the length of a ziplist, or UINT16_MAX if the length cannot be
 * determined without scanning the whole ziplist. */
#define ZIPLIST_LENGTH(zl)      (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))


#define ZIP_END 255         /* Special "end of ziplist" entry. */


/* Create a new empty ziplist. */
unsigned char *ziplistNew(void) {
    unsigned int bytes = ZIPLIST_HEADER_SIZE+1; 
    unsigned char *zl = zmalloc(bytes);
    ZIPLIST_BYTES(zl) = intrev32ifbe(bytes); //zl的头32位记录 压缩列表占中总字节数
    ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE); //记录压缩列表的表尾节点到压缩列表启始节点有多少字节
    ZIPLIST_LENGTH(zl) = 0; 
    zl[bytes-1] = ZIP_END; //最后一位用255标记结束
    return zl;
}

2.压缩列表中节点的构成

redis源码浅析--六-压缩列表_第2张图片

2.1 previous_entry_len

  • previous_entry_len 属性的长度可以是一字节或者是5字节。
    如果前一节点的长度小于254字节,那么previous_entry_len长度是一字节。
    如果前一节点的长度大于等于254字节,那么previous_entry_len的长度是5字节,其中第一个字节会被设置为254,之后的四个字节用来存储前一节点的长度。
  • 程序可以根据previous_entry_len属性,来计算前一节点的地址。
    如有一个当前节点指针c,那么c减去previous_entry_len就是上一节点的地址。

上源码,计算previous_entry_len属性的位于ziplist.c 宏ZIP_DECODE_PREVLEN 
 

//前一个条目的最大字节数;前一条目小于该值时,使用一个字节存储。 大与该值时使用4字节存储
#define ZIP_BIG_PREVLEN 254

/*
返回用于对上一条记录的长度进行编码的字节数。通过设置prevlensize返回长度。
例如返回5,说明ptr指向的前5个字节都是用来“编码记录”上一个节点的长度的
需要5个字节的话,第一个字节的值会被设置成254,后四个字节用来存储前一个节点的长度
*/
/* Return the number of bytes used to encode the length of the previous
 * entry. The length is returned by setting the var 'prevlensize'. */
#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do {                          \
    if ((ptr)[0] < ZIP_BIG_PREVLEN) {                                          \
        (prevlensize) = 1;                                                     \
    } else {                                                                   \
        (prevlensize) = 5;                                                     \
    }                                                                          \
} while(0);


/*
 解析ptr指向的节点的prevlensize和prevlen

 prevlensize:记录上一个节点长度 所需 编码字节
 prevlen:上一个节点的字节数量
*/
/* Return the length of the previous element, and the number of bytes that
 * are used in order to encode the previous element length.
 * 'ptr' must point to the prevlen prefix of an entry (that encodes the
 * length of the previous entry in order to navigate the elements backward).
 * The length of the previous entry is stored in 'prevlen', the number of
 * bytes needed to encode the previous entry length are stored in
 * 'prevlensize'. */
#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do {                     \
    ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);                                  \
    if ((prevlensize) == 1) {                                                  \
        (prevlen) = (ptr)[0];                                                  \
    } else if ((prevlensize) == 5) {                                           \
        assert(sizeof((prevlen)) == 4);                                    \
        memcpy(&(prevlen), ((char*)(ptr)) + 1, 4);                             \
        memrev32ifbe(&prevlen);                                                \
    }                                                                          \
} while(0);

2.2 encoding

encoding记录所属节点的content的编码方式,以及长度。

  • encoding值的最高位为00、01、10 表示cotent的编码方式为字节数组。
    content的数组长度由去除最高两位的后其他位记录。

    上源码:
    
    #define ZIP_STR_MASK 0xc0 // 0xc0 = 0B1100 0000; 字符串的编码方式的掩码,
    
    /* Macro to determine if the entry is a string. String entries never start
     * with "11" as most significant bits of the first byte. */
    #define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)
    


     
  • encoding值的最高位以11开头,表示cotent的编码方式是整数。
    整数编码
    编码 encoding编码长度 content保存的值
    1100 0000 1字节 int_16 类型的整数
    1101 0000 1字节 int_32类型的整数
    1110 0000 1字节 int_64类型的整数
    1111 0000 1字节 24位的有符号整数
    1111 1110 1字节 8位有符号整数
    1111 xxxx 1字节 当值大于等于0 && 小于等于12时,使用该编码方式;
    这时节点编码没有content属性;
    值编码的到encoding属性中。


    整数类型的编码方式可以阅读源码函数zipTryEncoding:
    #define ZIP_INT_16B (0xc0 | 0 << 4) // 0B1100 0000 | 0B0 0000 = 1100 0000
    #define ZIP_INT_32B (0xc0 | 1 << 4) // 0B1100 0000 |0B1 0000  = 1101 0000
    #define ZIP_INT_64B (0xc0 | 2 << 4) // 0B1100 0000 |0B10 0000 = 1110 0000
    #define ZIP_INT_24B (0xc0 | 3 << 4) // 0B1100 0000 |0B11 0000 = 1111 0000
    #define ZIP_INT_8B 0xfe             // 0B11111110
    
    /*
    检查“entry”指向的字符串是否可以编码为整数。
    将整数值存储在“v”中,将其编码存储在“encoding”中
    */
    /* Check if string pointed to by 'entry' can be encoded as an integer.
     * Stores the integer value in 'v' and its encoding in 'encoding'. */
    int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
        long long value;
        
        if (entrylen >= 32 || entrylen == 0) return 0;
        if (string2ll((char*)entry,entrylen,&value)) {
            /* Great, the string can be encoded. Check what's the smallest
             * of our encoding types that can hold this value. */
            if (value >= 0 && value <= 12) { 
                *encoding = ZIP_INT_IMM_MIN+value;
            } else if (value >= INT8_MIN && value <= INT8_MAX) {// val值在 8位int范围内
                *encoding = ZIP_INT_8B;
            } else if (value >= INT16_MIN && value <= INT16_MAX) { // val值在 16位int范围内
                *encoding = ZIP_INT_16B;
            } else if (value >= INT24_MIN && value <= INT24_MAX) { // val值在 24位int范围内
                *encoding = ZIP_INT_24B;
            } else if (value >= INT32_MIN && value <= INT32_MAX) { // val值在 32位int范围内
                *encoding = ZIP_INT_32B;
            } else {
                *encoding = ZIP_INT_64B;
            }
            *v = value;
            return 1;
        }
        return 0;
    }
    

2.3 content

content属性负责保存节点的值;

节点的值可以是整数,或是字节数组;

值的类型和长度存储在encoding属性中;

 

二 连锁更新

上面说过,每个节点都有一个previous_entry_len属性记录上一节点的长度;

如果上一节点长度小于254字节,那么previous_entry_len 属性需要一个字节来存储;

如果上一节点长度大于等于254字节,那么previous_entry_len 属性需要五个字节来存储;

 

那么问题来了,如果列表中有多个连续的节点且长度介于250到253之间; 在这个连续节点前插入一个 长度大于等于 254节点节点, 那么将导致后边连续的节点 都无法存储上一节点的长度,引发连锁内存分配操作;

连锁更新最坏的情况下,需要对压缩链接表进行N次空间分配操作;每次空间分配最坏复杂度是O(n);所以连锁更新的最坏时间复杂度是O(n*n);

上连锁更新源码:


/* When an entry is inserted, we need to set the prevlen field of the next
 * entry to equal the length of the inserted entry. It can occur that this
 * length cannot be encoded in 1 byte and the next entry needs to be grow
 * a bit larger to hold the 5-byte encoded prevlen. This can be done for free,
 * because this only happens when an entry is already being inserted (which
 * causes a realloc and memmove). However, encoding the prevlen may require
 * that this entry is grown as well. This effect may cascade throughout
 * the ziplist when there are consecutive entries with a size close to
 * ZIP_BIG_PREVLEN, so we need to check that the prevlen can be encoded in
 * every consecutive entry.
 *
 * Note that this effect can also happen in reverse, where the bytes required
 * to encode the prevlen field can shrink. This effect is deliberately ignored,
 * because it can cause a "flapping" effect where a chain prevlen fields is
 * first grown and then shrunk again after consecutive inserts. Rather, the
 * field is allowed to stay larger than necessary, because a large prevlen
 * field implies the ziplist is holding large entries anyway.
 *
 * The pointer "p" points to the first entry that does NOT need to be
 * updated, i.e. consecutive fields MAY need an update. */
unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
    size_t offset, noffset, extra;
    unsigned char *np;
    zlentry cur, next;

    while (p[0] != ZIP_END) {
        zipEntry(p, &cur);
        rawlen = cur.headersize + cur.len;
        rawlensize = zipStorePrevEntryLength(NULL,rawlen);

        /* Abort if there is no next entry. */
        if (p[rawlen] == ZIP_END) break;
        zipEntry(p+rawlen, &next);

        /* Abort when "prevlen" has not changed. */
        if (next.prevrawlen == rawlen) break;

        if (next.prevrawlensize < rawlensize) {
            /* The "prevlen" field of "next" needs more bytes to hold
             * the raw length of "cur". */
            offset = p-zl;
            extra = rawlensize-next.prevrawlensize;
            zl = ziplistResize(zl,curlen+extra);
            p = zl+offset;

            /* Current pointer and offset for next element. */
            np = p+rawlen;
            noffset = np-zl;

            /* Update tail offset when next element is not the tail element. */
            if ((zl+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) != np) {
                ZIPLIST_TAIL_OFFSET(zl) =
                    intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
            }

            /* Move the tail to the back. */
            memmove(np+rawlensize,
                np+next.prevrawlensize,
                curlen-noffset-next.prevrawlensize-1);
            zipStorePrevEntryLength(np,rawlen);

            /* Advance the cursor */
            p += rawlen;
            curlen += extra;
        } else {
            if (next.prevrawlensize > rawlensize) {
                /* This would result in shrinking, which we want to avoid.
                 * So, set "rawlen" in the available bytes. */
                zipStorePrevEntryLengthLarge(p+rawlen,rawlen);
            } else {
                zipStorePrevEntryLength(p+rawlen,rawlen);
            }

            /* Stop here, as the raw length of "next" has not changed. */
            break;
        }
    }
    return zl;
}

 

三 压缩列表API


unsigned char *ziplistNew(void); //创建一个新的压缩列表
unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where); //将一个值添加到列表, 头,或者列表尾部
unsigned char *ziplistIndex(unsigned char *zl, int index); //返回给定索引上的节点
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);//返回给定节点的下一个节点
unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);//返回给定节点的上一个节点
unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval); //获取给定节点保存的值
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);//指定节点(p)后插入一个新节点(s),新节点s的长度(slen)
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p); //删除一个指定节点
unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num); //删除指定节点后连续多个节点
unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip); //查找并返回包含给定值的节点
unsigned int ziplistLen(unsigned char *zl); //返回列表包含的节点数量
size_t ziplistBlobLen(unsigned char *zl);//返回压缩列表占用的字节数量
void ziplistRepr(unsigned char *zl);

 

注:可以 ziplistInsert作为入口阅读;上边讲到的所有点都有在ziplistInsert函数中涉及;

你可能感兴趣的:(redis)