阅读源码: sds.h sds.c
SDSHDR 全称 Simple Dynamic Strings Header
sds
char *的别名
typedef char *sds;
sdshdr
sdshdr有好几个类别,它们分别是:sdshdr5,sdshdr8,sdshdr16,sdshdr32,sdshdr64,其中sdshdr5是不使用的
源码如下:
/* Note: sdshdr5 is never used, we just access the flags byte directly.
* However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
uint8_t len; /* used */
uint8_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
这五个结构体中,len表示字符串的长度,alloc表示buf指针分配空间的大小,flags表示该字符串的类型(sdshdr5,sdshdr8,sdshdr16,sdshdr32,sdshdr64),是由flags的第三位表示的,至于为何怎么说,请看下方的源码:
#define SDS_TYPE_5 0
#define SDS_TYPE_8 1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
#define SDS_TYPE_MASK 7
可以看出SDS_TYPE只占用了0,1,2,3,4五个数字,正好占用三位,我们就可以使用flags&SDS_TYPE_MASK来获取动态字符串对应的字符串类型
注意一个小细节:attribute ((packed)),这一段代码的作用是取消编译阶段的内存优化对齐功能。
例如:struct aa {char a; int b;}; sizeof(aa) == 8;
但是struct attribute ((packed)) aa {char a; int b;}; sizeof(aa) == 5;
这个很重要,redis源码中不是直接对sdshdr某一个类型操作,往往参数都是sds,而sds就是结构体中的buf,在后面的源码分析中,你可能会经常看见s[-1]这种魔法一般的操作,而按照sdshdr内存分布s[-1]就是sdshdr中flags变量,由此可以获取到该sds指向的字符串的类型。
SDS中的宏定义函数
SDS_HDR_VAR(T,s)
#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
查看这段代码首先得明白 C 语言中的宏定义##操作符,我在此就不再解释了
这段代码就很骚气了,用个例子来解释:
SDS_HDR_VAR(8,s);
//下面是对应宏定义翻译的产物
struct sdshdr8 *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
这样就可以根据指向buf的sds变量s得到sdshdr8的指针,是不是感觉很神奇?
SDS_HDR(T,s)
#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
同上方的函数类似,根据指向buf的sds变量s得到sdshdr的指针,只不过这里是获取的是指针地址,上方函数是创建了一个变量
SDS_TYPE_5_LEN(f)
#define SDS_TYPE_BITS 3
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
看名字就知道该函数就是获取sdshdr5字符串类型的长度,由于根本不使用sdshdr5类型,所以需要直接返回空,而flags成员使用最低三位有效位来表示类型,所以让f代表的flags的值右移三位即可
上方基本上就是sds的核心内容了,然后再看看sds中的几个内联函数
static inline size_t sdslen(const sds s)
static inline size_t sdslen(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8:
return SDS_HDR(8,s)->len;
case SDS_TYPE_16:
return SDS_HDR(16,s)->len;
case SDS_TYPE_32:
return SDS_HDR(32,s)->len;
case SDS_TYPE_64:
return SDS_HDR(64,s)->len;
}
return 0;
}
该处就使用到了取消编译阶段的内存优化对齐功能,直接使用s[-1]获取到flags成员的值,然后根据flags&&SDS_TYPE_MASK来获取到动态字符串对应的类型进而获取动态字符串的长度。
static inline size_t sdsavail(const sds s)
static inline size_t sdsavail(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5: {
return 0;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
return sh->alloc - sh->len;
}
}
return 0;
}
获取动态字符串可使用的空间,从这里可以看出来,SDS和我平常所用到的C语言的原生字符串有差别,因为从获取可用空间的计算方法来看,并未考虑到字符串需要以\0结尾,因为结构体本身带有长度的成员len,不需要\0来做字符串结尾的判定,而且不使用\0作为结尾有很多好处,可以存储的类型多样性就提高了。
static inline void sdssetlen(sds s, size_t newlen)
static inline void sdssetlen(sds s, size_t newlen) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
{
unsigned char *fp = ((unsigned char*)s)-1;
*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
}
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->len = newlen;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->len = newlen;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->len = newlen;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->len = newlen;
break;
}
}
设置sds的长度
static inline void sdsinclen(sds s, size_t inc)
static inline void sdsinclen(sds s, size_t inc) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
{
unsigned char *fp = ((unsigned char*)s)-1;
unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
}
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->len += inc;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->len += inc;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->len += inc;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->len += inc;
break;
}
}
增加sds的长度
static inline size_t sdsalloc(const sds s)
/* sdsalloc() = sdsavail() + sdslen() */
static inline size_t sdsalloc(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8:
return SDS_HDR(8,s)->alloc;
case SDS_TYPE_16:
return SDS_HDR(16,s)->alloc;
case SDS_TYPE_32:
return SDS_HDR(32,s)->alloc;
case SDS_TYPE_64:
return SDS_HDR(64,s)->alloc;
}
return 0;
}
获取sds已分配空间的大小
static inline void sdssetalloc(sds s, size_t newlen)
static inline void sdssetalloc(sds s, size_t newlen) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
/* Nothing to do, this type has no total allocation info. */
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->alloc = newlen;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->alloc = newlen;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->alloc = newlen;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->alloc = newlen;
break;
}
}
设置sds已分配空间的大小
SDS函数
sds sdsnewlen(const void *init, size_t initlen);
sds sdsnew(const char *init);
sds sdsempty(void);
sds sdsdup(const sds s);
void sdsfree(sds s);
sds sdsgrowzero(sds s, size_t len);
sds sdscatlen(sds s, const void *t, size_t len);
sds sdscat(sds s, const char *t);
sds sdscatsds(sds s, const sds t);
sds sdscpylen(sds s, const char *t, size_t len);
sds sdscpy(sds s, const char *t);
sds sdscatvprintf(sds s, const char *fmt, va_list ap);
#ifdef __GNUC__
sds sdscatprintf(sds s, const char *fmt, ...)
__attribute__((format(printf, 2, 3)));
#else
sds sdscatprintf(sds s, const char *fmt, ...);
#endif
sds sdscatfmt(sds s, char const *fmt, ...);
sds sdstrim(sds s, const char *cset);
void sdsrange(sds s, ssize_t start, ssize_t end);
void sdsupdatelen(sds s);
void sdsclear(sds s);
int sdscmp(const sds s1, const sds s2);
sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count);
void sdsfreesplitres(sds *tokens, int count);
void sdstolower(sds s);
void sdstoupper(sds s);
sds sdsfromlonglong(long long value);
sds sdscatrepr(sds s, const char *p, size_t len);
sds *sdssplitargs(const char *line, int *argc);
sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
sds sdsjoin(char **argv, int argc, char *sep);
sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
/* Low level functions exposed to the user API */
sds sdsMakeRoomFor(sds s, size_t addlen);
void sdsIncrLen(sds s, ssize_t incr);
sds sdsRemoveFreeSpace(sds s);
size_t sdsAllocSize(sds s);
void *sdsAllocPtr(sds s);
/* Export the allocator used by SDS to the program using SDS.
* Sometimes the program SDS is linked to, may use a different set of
* allocators, but may want to allocate or free things that SDS will
* respectively free or allocate. */
void *sds_malloc(size_t size);
void *sds_realloc(void *ptr, size_t size);
void sds_free(void *ptr);
这当中的大部分函数都很简单,只是对zmalloc文件里面的函数,sds中inline函数,或者是sdsnewlen函数的一层简单调用,就不解释,我们挑几个重点的看看。
sds sdsnewlen(const void *init, size_t initlen)
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
* If NULL is used for 'init' the string is initialized with zero bytes.
* If SDS_NOINIT is used, the buffer is left uninitialized;
*
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
*
* mystring = sdsnewlen("abc",3);
*
* You can print the string with printf() as there is an implicit \0 at the
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
sds sdsnewlen(const void *init, size_t initlen) {
void *sh;
sds s;
// 根据initlen来获取合适的字符串长度
char type = sdsReqType(initlen);
/* Empty strings are usually created in order to append. Use type 8
* since type 5 is not good at this. */
if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
// 根据sds类型来获取该sds类型对应的结构体大小
int hdrlen = sdsHdrSize(type);
unsigned char *fp; /* flags pointer. */
// 此处的s_malloc其实就是zmalloc函数,只是一个别名,注意这里,会给sds多增加一个字节的空间,由后面的s[initlen] = '\0';可知,作者是为了兼容C语言的字符串类型,这样就可以直接使用printf来输出sds了,这样非常的方便
sh = s_malloc(hdrlen+initlen+1);
// 如果init == "SDS_NOINIT",那么就会把sds置为未知字符串,如果init == NULL,那么就会把sds置为空字符串
if (init==SDS_NOINIT)
init = NULL;
else if (!init)
memset(sh, 0, hdrlen+initlen+1);
if (sh == NULL) return NULL;
s = (char*)sh+hdrlen;
fp = ((unsigned char*)s)-1;
// 根据sds类型来初始化sds的内容
switch(type) {
case SDS_TYPE_5: {
*fp = type | (initlen << SDS_TYPE_BITS);
break;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
}
// 在初始化完成后,将init的内容拷贝进sds对象中,但是init如果原来等于SDS_NOINIT,就会被置为NULL,所以sds还是一串未知的字符串
if (initlen && init)
memcpy(s, init, initlen);
s[initlen] = '\0';
return s;
}
该函数是一个生成新sds的函数,根据init指针和initlen参数来初始化sds的内容,根据init是否是SDS_NOINIT来设置是否需要使用init的内容初始化sds,如果是SDS_NOINIT,那么默认会将sds置为一串为未知的字符串,如果init为NULL。那么默认会将sds置为一个空字符串,并且sdsnewlen在为sds向系统申请一个新的空间的时候,新空间的长度是hdrlen+initlen+1,注意这里的+1,这多出来的一个字节,其实是放\0的,这样sds就可以使用C自带标准库(strlen,strtoll之类的函数)来操作sds中buf的内容,不然还得自己写一套,很是麻烦。
以下是使用sdsnewlen函数简单调用构成的函数:
// 创建一个新的sds,只不过这里只会给init参数,实际上initlen是通过strlen获取的
sds sdsnew(const char *init);
// 创建一个空的sds
sds sdsempty(void);
// 根据原sds,创建一个sds的副本
sds sdsdup(const sds s);
以下是对zmalloc里面的函数简单调用或者直接是别名的函数:
#define s_malloc zmalloc
#define s_realloc zrealloc
#define s_free zfree
/* Free an sds string. No operation is performed if 's' is NULL. */
void sdsfree(sds s) {
if (s == NULL) return;
s_free((char*)s-sdsHdrSize(s[-1]));
}
void *sds_malloc(size_t size) { return s_malloc(size); }
void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
void sds_free(void *ptr) { s_free(ptr); }
在动态字符串的所有操作中,大部分会进行对内存的扩大和释放,所以得介绍一下sds中对内存扩大和释放的函数
扩大sds的空闲空间 -- sdsMakeRoomFor函数
sds sdsMakeRoomFor(sds s, size_t addlen) {
void *sh, *newsh;
size_t avail = sdsavail(s);
size_t len, newlen;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen;
/* Return ASAP if there is enough space left. */
if (avail >= addlen) return s;
len = sdslen(s);
sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
type = sdsReqType(newlen);
/* Don't use type 5: the user is appending to the string and type 5 is
* not able to remember empty space, so sdsMakeRoomFor() must be called
* at every appending operation. */
if (type == SDS_TYPE_5) type = SDS_TYPE_8;
hdrlen = sdsHdrSize(type);
if (oldtype==type) {
newsh = s_realloc(sh, hdrlen+newlen+1);
if (newsh == NULL) return NULL;
s = (char*)newsh+hdrlen;
} else {
/* Since the header size changes, need to move the string forward,
* and can't use realloc */
newsh = s_malloc(hdrlen+newlen+1);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, newlen);
return s;
}
该函数便是扩大sds空间,但是感觉上还是想让sds中available空间的大小能够容纳addlen大小的字符串,并不是改变了sds中buf的长度,而是改变了sds中available空间的大小,如果当前available空间的大小大于addlen的大小,那么便不作修改,如果available空间的大小小鱼addlen的大小,那么就会重新分配sds中alloc的大小,newlen并不是无脑直接让alloc加上addlen,而且使用sds的长度加上addlen的长度作为newlen,但是经常重新分配内存会对效率有所影响,但是为了防止重新分配内存对效率的影响而让newlen无脑翻倍的话,又会对内存造成影响,造成内存占用过高,但是很大一部分内存并没有使用,所以取得了一个折中的办法,就是在newlen小于SDS_MAX_PREALLOC(1M),对newlen进行翻倍,在newlen大于SDS_MAX_PREALLOC的情况下,让newlen加上SDS_MAX_PREALLOC。
sdsRemoveFreeSpace
sds sdsRemoveFreeSpace(sds s) {
void *sh, *newsh;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen, oldhdrlen = sdsHdrSize(oldtype);
size_t len = sdslen(s);
sh = (char*)s-oldhdrlen;
/* Check what would be the minimum SDS header that is just good enough to
* fit this string. */
type = sdsReqType(len);
hdrlen = sdsHdrSize(type);
/* If the type is the same, or at least a large enough type is still
* required, we just realloc(), letting the allocator to do the copy
* only if really needed. Otherwise if the change is huge, we manually
* reallocate the string to use the different header type. */
if (oldtype==type || type > SDS_TYPE_8) {
newsh = s_realloc(sh, oldhdrlen+len+1);
if (newsh == NULL) return NULL;
s = (char*)newsh+oldhdrlen;
} else {
newsh = s_malloc(hdrlen+len+1);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, len);
return s;
}
就是对sds中多余的空间进行释放,例如以前是一个sdshdr64的sds,在redis运行过程中,buf的内容被修改了,变短了,那么多出来的内容就需要释放掉,还给系统,并且,如果修改得比较多,现在一个sdshdr16的sds就能容纳下,那么当前sds的type还会被修改,因为不同的sds类型占用的空间也是不一样的,并且杀鸡焉用宰牛刀,是吧。
其他的函数介绍意义其实并不大,都很简单,我们仅需要知道sds的内存分布,内存操作即可。