ziplist.c

Redis中的ziplist,又名压缩列表,是一种经过特殊编码的双链接列表极度节约内存的数据结构。
可以存储字符串整数值,其中整数被编码为实际整数,而不是一系列字符。
它允许在 O(1) 时间内在列表的任一侧执行pushpop操作。

但是,由于每个操作都需要重新分配ziplist使用的内存,因此实际的复杂性与ziplist使用的内存量有关。

ziplist的数据结构如下:


ziplist.png
  • zlbytes 是一个uint32_t的无符号整数,存储着整个ziplist所占的字节数,包含zlbytes自身。

  • zltail 是一个uint32_t的无符号整数,存储着ziplist中最后一个entry的偏移量,方便O(1)操作队尾,而不需要遍历整个链表.

  • zllen 表示entry的个数,存储的最大值为2^16-1,超过这个范围的话,就要进行一次遍历

  • zlend 是一个比较特殊的节点,单字节,值为255,代表着列表的尾结点。

  • entry 存储着前一个节点的长度prevlen,这么做的原因是为了方便从后往前遍历,数据的格式encoding取值为string或者integer,节点的数据entry-data,如果,这个entry表示的是自身,那么entry-data就不需要了。

关于prevlen,不同的数据长度会有不同的表示方法。

  • 当前置节点的长度小于254字节,只需要1个字节无符号整数即可。
  • 当前置节点的长度大于等于254字节,则需要1个5字节的空间来存储,第1个字节保存的值为0xFE,用来标识节点是一个大值,后4个字节用来保存真实的节点长度值。

ziplist的entry定义如下:

/* We use this function to receive information about a ziplist entry.
 * Note that this is not how the data is actually encoded, is just what we
 * get filled by a function in order to operate more easily. */
typedef struct zlentry {
    //前置节点字节数
    unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
    //前置节点长度
    unsigned int prevrawlen;     /* Previous entry len. */
    unsigned int lensize;        /* Bytes used to encode this entry type/len.
                                    For example strings have a 1, 2 or 5 bytes
    //当前节点大小                                header. Integers always use a single byte.*/
    unsigned int len;            /* Bytes used to represent the actual entry.
                                    For strings this is just the string length
                                    while for integers it is 1, 2, 3, 4, 8 or
                                    0 (for 4 bit immediate) depending on the
                                    number range. */
    //当前节点header大小,prevrawlensize + lensize
    unsigned int headersize;     /* prevrawlensize + lensize. */
    //当前节点的编码格式 string或者integer
    unsigned char encoding;      /* Set to ZIP_STR_* or ZIP_INT_* depending on
                                    the entry encoding. However for 4 bits
                                    immediate integers this can assume a range
                                    of values and must be range-checked. */
    //指向当前节点的指针
    unsigned char *p;            /* Pointer to the very start of the entry, that
                                    is, this points to prev-entry-len field. */
} zlentry;

1. 新建压缩列表

//新建一个空的压缩列表
unsigned char *ziplistNew(void) {
    //ZIPLIST_HEADER_SIZE= 2个32位的+1个16位的
    unsigned int bytes = ZIPLIST_HEADER_SIZE+1;
    unsigned char *zl = zmalloc(bytes);
    //返回组成ziplist的总字节数,intrev32ifbe转为32位int
    ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
    //返回ziplist最后一项的偏移量
    ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
    //返回整个ziplist的大小,
    ZIPLIST_LENGTH(zl) = 0;
    zl[bytes-1] = ZIP_END;
    return zl;
}

初始的时候,ziplist是个空的双端列表,每次插入元素的时候再分配内存。

2. 添加节点

//添加节点
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {
    //当前要插入的节点
    unsigned char *p;
    //ZIPLIST_ENTRY_HEAD返回的是首节点
    //ZIPLIST_ENTRY_END返回的是末节点
    //where表示插入节点的位置
    p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);
    return __ziplistInsert(zl,p,s,slen);
}

/* Insert item at "p". */
//根据指针p所在的位置,将长度为slen的字符串s,插入到zl中,并返回插入节点后的zl
unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
    //计算当前ziplist的总长度curlen
    //reqlen用来存储此次插入节点需要的长度
    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;
    //前置节点字节大小及长度
    unsigned int prevlensize, prevlen = 0;
    //偏移量
    size_t offset;
    int nextdiff = 0;
    unsigned char encoding = 0;
    long long value = 123456789; /* initialized to avoid warning. Using a value
                                    that is easy to see if for some reason
                                    we use it uninitialized. */
    zlentry tail;

    /* Find out prevlen for the entry that is inserted. */
    if (p[0] != ZIP_END) {
        //计算prevlen部分是用1个字节存还是5个字节存,取决于prevlensize是否大于等于254
        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
    } else {
        unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
        if (ptail[0] != ZIP_END) {
            prevlen = zipRawEntryLength(ptail);
        }
    }

    /* See if the entry can be encoded */
    //encoding部分尝试将string转为integer类型
    //1. 计算reqlen第一部分长度,zllen 如果encoding是integer,则不使用slen,计算integer需要的长度,否则用slen
    if (zipTryEncoding(s,slen,&value,&encoding)) {
        /* 'encoding' is set to the appropriate integer encoding */
        reqlen = zipIntSize(encoding);
    } else {
        /* 'encoding' is untouched, however zipStoreEntryEncoding will use the
         * string length to figure out how to encode it. */
        reqlen = slen;
    }
    /* We need space for both the length of the previous entry and
     * the length of the payload. */

    //2. 计算reqlen第二部分长度,prevlen
    reqlen += zipStorePrevEntryLength(NULL,prevlen);

    //3. 计算reqlen第三部分长度,encoding
    reqlen += zipStoreEntryEncoding(NULL,encoding,slen);

    /* When the insert position is not equal to the tail, we need to
     * make sure that the next entry can hold this entry's length in
     * its prevlen field. */
    //当
    int forcelarge = 0;

    //计算因前置节点大小变化引起的字节数差距,如果大于0,则表示需要更多空间,反之需要减少空间
    //新增节点可能不是在head或者tail位置插入,可以根据index插入,需要计算字节差
    nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
    if (nextdiff == -4 && reqlen < 4) {
        nextdiff = 0;
        forcelarge = 1;
    }

    /* Store offset because a realloc may change the address of zl. */
    //存储原先的offset
    offset = p-zl;

    //重新设置ziplist的大小
    zl = ziplistResize(zl,curlen+reqlen+nextdiff);

    //新节点
    p = zl+offset;

    /* Apply memory move when necessary and update tail offset. */
    if (p[0] != ZIP_END) {
        /* Subtract one because of the ZIP_END bytes */
        memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);

        /* Encode this entry's raw length in the next entry. */
        if (forcelarge)
            zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
        else
            zipStorePrevEntryLength(p+reqlen,reqlen);

        /* Update offset for tail */
        //修改zltail的值
        ZIPLIST_TAIL_OFFSET(zl) =
            intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);

        /* When the tail contains more than one entry, we need to take
         * "nextdiff" in account as well. Otherwise, a change in the
         * size of prevlen doesn't have an effect on the *tail* offset. */

        //将p所指向的zl节点的信息全部保存到zlentry中,并返回。
        zipEntry(p+reqlen, &tail);

        //修改zltail
        if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
            ZIPLIST_TAIL_OFFSET(zl) =
                intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
        }
    } else {
        /* This element will be the new tail. */
        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
    }

    /* When nextdiff != 0, the raw length of the next entry has changed, so
     * we need to cascade the update throughout the ziplist */
    //当nextdiff不等于0,表示下一个entry的字节长度变了,需要级联更新ziplist中entry的大小
    if (nextdiff != 0) {
        offset = p-zl;
        //在列表的中间插入或者删除元素可能会造成级联更新的发生
        zl = __ziplistCascadeUpdate(zl,p+reqlen);
        p = zl+offset;
    }

    /* Write the entry */
    //将prevlen存储到zipEntry中
    p += zipStorePrevEntryLength(p,prevlen);
    //将encoding存储到zipEntry中
    p += zipStoreEntryEncoding(p,encoding,slen);

    //根据encoding是string或者integer来确定要复制多少字节数据到p中
    if (ZIP_IS_STR(encoding)) {
        memcpy(p,s,slen);
    } else {
        zipSaveInteger(p,value,encoding);
    }

    //修改zllen,节点个数+1
    ZIPLIST_INCR_LENGTH(zl,1);
    return zl;
}

节点可以在head插入,可以在tail插入,可以在index位置插入,由于entry-data的大小可能超过254,一旦超过就会涉及到字节大小的扩充,引起级联更新entry的大小。

插入完节点后,分别修改zllen、prevlen、encoding、zltail属性。

3. 删除节点

/* Delete a single entry from the ziplist, pointed to by *p.
 * Also update *p in place, to be able to iterate over the
 * ziplist, while deleting entries. */
//从指针p处开始删除zl中的节点
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
    size_t offset = *p-zl;
    //删除1个节点
    zl = __ziplistDelete(zl,*p,1);
    /* Store pointer to current element in p, because ziplistDelete will
     * do a realloc which might result in a different "zl"-pointer.
     * When the delete direction is back to front, we might delete the last
     * entry and end up with "p" pointing to ZIP_END, so check this. */
    *p = zl+offset;
    return zl;
}

/* Delete a range of entries from the ziplist. */
//从index处开始删除nums个节点
unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num) {
    //找出index处的指针
    unsigned char *p = ziplistIndex(zl,index);
    return (p == NULL) ? zl : __ziplistDelete(zl,p,num);
}

/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */
//从指针p处开始,连续删除nums个节点
unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
    unsigned int i, totlen, deleted = 0;
    size_t offset;
    int nextdiff = 0;
    zlentry first, tail;

    zipEntry(p, &first);
    //deleted存储删除节点的个数
    //p存储删除的总字节数
    for (i = 0; p[0] != ZIP_END && i < num; i++) {
        p += zipRawEntryLength(p);
        deleted++;
    }

    totlen = p-first.p; /* Bytes taken by the element(s) to delete. */
    if (totlen > 0) {
        //如果删除的不是尾结点,则需要移动内存
        if (p[0] != ZIP_END) {
            /* Storing `prevrawlen` in this entry may increase or decrease the
             * number of bytes required compare to the current `prevrawlen`.
             * There always is room to store this, because it was previously
             * stored by an entry that is now being deleted. */
            //计算字节差
            nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);

            /* Note that there is always space when p jumps backward: if
             * the new previous entry is large, one of the deleted elements
             * had a 5 bytes prevlen header, so there is for sure at least
             * 5 bytes free and we need just 4. */
            p -= nextdiff;
            zipStorePrevEntryLength(p,first.prevrawlen);

            /* Update offset for tail */
            ZIPLIST_TAIL_OFFSET(zl) =
                intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen);

            /* When the tail contains more than one entry, we need to take
             * "nextdiff" in account as well. Otherwise, a change in the
             * size of prevlen doesn't have an effect on the *tail* offset. */
            zipEntry(p, &tail);
            if (p[tail.headersize+tail.len] != ZIP_END) {
                ZIPLIST_TAIL_OFFSET(zl) =
                   intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
            }

            /* Move tail to the front of the ziplist */
            memmove(first.p,p,
                intrev32ifbe(ZIPLIST_BYTES(zl))-(p-zl)-1);
        } else {
            //尾结点直接删除
            ZIPLIST_TAIL_OFFSET(zl) =
                intrev32ifbe((first.p-zl)-first.prevrawlen);
        }

        /* Resize and update length */
        offset = first.p-zl;
        zl = ziplistResize(zl, intrev32ifbe(ZIPLIST_BYTES(zl))-totlen+nextdiff);
        //修改zllen
        ZIPLIST_INCR_LENGTH(zl,-deleted);
        p = zl+offset;

        /* When nextdiff != 0, the raw length of the next entry has changed, so
         * we need to cascade the update throughout the ziplist */
        if (nextdiff != 0)
            //级联更新
            zl = __ziplistCascadeUpdate(zl,p);
    }
    return zl;
}

4. 计算节点个数

/* Return length of ziplist. */
//计算ziplist中entry的个数
unsigned int ziplistLen(unsigned char *zl) {
    unsigned int len = 0;
    //当zllen小于2^16-1的时候,直接返回zllen的值
    if (intrev16ifbe(ZIPLIST_LENGTH(zl)) < UINT16_MAX) {
        len = intrev16ifbe(ZIPLIST_LENGTH(zl));
    } else {
        //当zllen大于等于2^16-1,则需要遍历ziplist,计算总节点个数
        unsigned char *p = zl+ZIPLIST_HEADER_SIZE;
        while (*p != ZIP_END) {
            p += zipRawEntryLength(p);
            len++;
        }
        /* Re-store length if small enough */
        //如果长度小于2^16-1,则修改zllen
        if (len < UINT16_MAX) ZIPLIST_LENGTH(zl) = intrev16ifbe(len);
    }
    return len;
}

你可能感兴趣的:(ziplist.c)