redis源码地址
sds源码所在位置:src/sds.h src/sds.c
sds独立仓库地址
简介:sds (Simple Dynamic String),即动态字符串管理库
why we need sds?
sds相比c的标准库优势:
1、相比于c标准库,获取字符串的len复杂读从O(N)降低到O(1),sds结构中存储了字符串的长度,所以类似strlen(str)的操作不会成为redis的性能瓶颈。
2、在内存分配策略上,redis总是会尝试多分配一些空间,比如小于1MB的字符串,总是分配2倍内存空间,对于大于1MB的空间追加1MB冗余空间,这对于字符串操作(如strcat等)能减少重新内存分配的几率,提升运行性能。
3、SDS总是安全的,sds总是会自动追加字符串结尾符号’\0’,有效防止溢出发生。
4、惰性释放内存,改变原字符串时,标准库需要重新分配内存的复杂度为O(N),SDS最大为O(N),最优情况下无需重新分配内存空间。
5、二进制安全。(SDS以len的值判断结束,而不是’\0’)
在sds.h中,有如下定义
typedef char *sds;
/* Note: sdshdr5 is never used, we just access the flags byte directly.
* However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
uint8_t len; /* used */
uint8_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
sds支持sdshdr5、sdshdr8、sdshdr16、sdshdr32、sdshdr64
sds structure layout
+--------+-------------------------------+-----------+
| Header | Binary safe C alike string... | Null term |
+--------+-------------------------------+-----------+
|
`-> Pointer returned to the user.
1、区别为字符串长度不同。
2、其中sdshdr5在源码中没有用到(即,如果用户申请的字符串长度满足sdshdr5则向上申请sdshdr8)
3、attribute ((packed))选项是告诉编译器,以1字节对齐。
4、len指已经使用的长度,alloc是申请的内存大小,flags低三位用于表示sds type
,源码如下:即(flags & SDS_TYPE_MASK)值就对应SDS_TYPE_xxx宏,比如flags = 0就代表SDS_TYPE_5, flags = 2就代表SDS_TYPE_16。
#define SDS_TYPE_5 0
#define SDS_TYPE_8 1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
#define SDS_TYPE_MASK 7
#define SDS_TYPE_BITS 3
#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
5、char buf[]用于存放真实的字符串。
5.1、注意这里的char buf[]是不占用空间的,怎么理解不占用空间呢,就是说用sizeof(sdshdr5),获得的结果是1,而不是2。
5.2、这种用法在嵌入式中非常常用,也常用于网络协议包的信令结构定义中。
5.3、特别注意,char buf[]必须放在struct的最后。
6、sdslen:获取len
static inline size_t sdslen(const sds s) {
unsigned char flags = s[-1]; /* s[-1]这是trick的写法,s[-1]等价于s + (-1) */
switch(flags&SDS_TYPE_MASK) { /* 通过flags 获取SDS_TYPE, 这在4中,已经讲过 */
case SDS_TYPE_5:
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8:
return SDS_HDR(8,s)->len;
case SDS_TYPE_16:
return SDS_HDR(16,s)->len;
case SDS_TYPE_32:
return SDS_HDR(32,s)->len;
case SDS_TYPE_64:
return SDS_HDR(64,s)->len;
}
return 0;
}
7、sdsavail:获取空闲的内存空间,即alloc - len
static inline size_t sdsavail(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5: {
return 0;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
return sh->alloc - sh->len;
}
}
return 0;
}
8、sdssetlen:设置len
static inline void sdssetlen(sds s, size_t newlen) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
{
unsigned char *fp = ((unsigned char*)s)-1;
*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
}
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->len = newlen;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->len = newlen;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->len = newlen;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->len = newlen;
break;
}
}
9、sdsinclen:increase len
static inline void sdsinclen(sds s, size_t inc) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
{
unsigned char *fp = ((unsigned char*)s)-1;
unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
}
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->len += inc;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->len += inc;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->len += inc;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->len += inc;
break;
}
}
10、sdsalloc:获取alloc
/* sdsalloc() = sdsavail() + sdslen() */
static inline size_t sdsalloc(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8:
return SDS_HDR(8,s)->alloc;
case SDS_TYPE_16:
return SDS_HDR(16,s)->alloc;
case SDS_TYPE_32:
return SDS_HDR(32,s)->alloc;
case SDS_TYPE_64:
return SDS_HDR(64,s)->alloc;
}
return 0;
}
11、sdssetalloc:设置alloc
static inline void sdssetalloc(sds s, size_t newlen) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
/* Nothing to do, this type has no total allocation info. */
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->alloc = newlen;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->alloc = newlen;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->alloc = newlen;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->alloc = newlen;
break;
}
}
12、sdsnew:创建sds
/* Create a new sds string starting from a null terminated C string. */
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);/*sds new主函数为sdsnewlen,本函数主要是做string len的计算*/
return sdsnewlen(init, initlen);
}
13、sdsnewlen
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
* If NULL is used for 'init' the string is initialized with zero bytes.
* If SDS_NOINIT is used, the buffer is left uninitialized;
*
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
*
* mystring = sdsnewlen("abc",3);
*
* You can print the string with printf() as there is an implicit \0 at the
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
sds sdsnewlen(const void *init, size_t initlen) {
void *sh;
sds s;
char type = sdsReqType(initlen); /* 通过initlen得到SDS_TYPE */
/* Empty strings are usually created in order to append. Use type 8
* since type 5 is not good at this. */
if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;/* SDS_TYPE_5永远不会使用,即使用户申请的string长度为0,也使用SDS_TYPE_8 */
int hdrlen = sdsHdrSize(type); /* 这里是获取header的长度,即类似sizeof(sdshdr8) */
unsigned char *fp; /* flags pointer. */
sh = s_malloc(hdrlen+initlen+1);/* 实际申请的空间大小为header + string len + '\0' */
if (init==SDS_NOINIT)
init = NULL;
else if (!init)
memset(sh, 0, hdrlen+initlen+1);
if (sh == NULL) return NULL; /* 这里空指针检查代码需要和memset调换顺序,源码有点小问题,已经给redis提交pull request */
s = (char*)sh+hdrlen; /* s指向char buf[],即返回的sds指针 */
fp = ((unsigned char*)s)-1;
switch(type) {
case SDS_TYPE_5: {
*fp = type | (initlen << SDS_TYPE_BITS);
break;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
}
if (initlen && init)
memcpy(s, init, initlen);
s[initlen] = '\0'; /* 默认补全'\0' */
return s;
}
14、sdsdup:复制sds
/* Duplicate an sds string. */
sds sdsdup(const sds s) {
/* 这个比较简单,和新建一个一样 */
return sdsnewlen(s, sdslen(s));
}
15、sdsfree:释放sds
/* Free an sds string. No operation is performed if 's' is NULL. */
void sdsfree(sds s) {
if (s == NULL) return;
s_free((char*)s-sdsHdrSize(s[-1])); /* s[-1]获取到flags,通过flags得到SDS_TYPE_XX, 通过SDS_TYPE_XX获得到header的长度,将s往前移动header的长度,即为 sdshdrXX的结构指针 */
}
16、sdsupdatelen:更新len的值
/* Set the sds string length to the length as obtained with strlen(), so
* considering as content only up to the first null term character.
*
* This function is useful when the sds string is hacked manually in some
* way, like in the following example:
*
* s = sdsnew("foobar");
* s[2] = '\0';
* sdsupdatelen(s);
* printf("%d\n", sdslen(s));
*
* The output will be "2", but if we comment out the call to sdsupdatelen()
* the output will be "6" as the string was modified but the logical length
* remains 6 bytes. */
void sdsupdatelen(sds s) {
size_t reallen = strlen(s); /* 该接口的使用场景,上面的英文例子已经写得很清楚了,这是一种trick的玩法 */
sdssetlen(s, reallen);
}
17、sdsclear:设置为空字符串,但是不会free空间
/* Modify an sds string in-place to make it empty (zero length).
* However all the existing buffer is not discarded but set as free space
* so that next append operations will not require allocations up to the
* number of bytes previously available. */
void sdsclear(sds s) {
sdssetlen(s, 0);
s[0] = '\0'; /* 仅仅把字符串设置为“”,但s指向的内存依然有效 */
}
18、sdsRemoveFreeSpace:remove空闲的内存,如果s重新分配了内存,那么 sds所有引用到的地方都需要修改,因为以前的指针已经失效,这个算是sds的一个弱点吧。
/* Reallocate the sds string so that it has no free space at the end. The
* contained string remains not altered, but next concatenation operations
* will require a reallocation.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdsRemoveFreeSpace(sds s) {
void *sh, *newsh;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen, oldhdrlen = sdsHdrSize(oldtype);
size_t len = sdslen(s);
size_t avail = sdsavail(s);
sh = (char*)s-oldhdrlen;
/* Return ASAP if there is no space left. */
if (avail == 0) return s;
/* Check what would be the minimum SDS header that is just good enough to
* fit this string. */
type = sdsReqType(len);
hdrlen = sdsHdrSize(type);
/* If the type is the same, or at least a large enough type is still
* required, we just realloc(), letting the allocator to do the copy
* only if really needed. Otherwise if the change is huge, we manually
* reallocate the string to use the different header type. */
if (oldtype==type || type > SDS_TYPE_8) {
newsh = s_realloc(sh, oldhdrlen+len+1);
if (newsh == NULL) return NULL;
s = (char*)newsh+oldhdrlen;
} else {
newsh = s_malloc(hdrlen+len+1);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, len);
return s;
}
19、sdsAllocSize:返回sds真实分配的内存大小。
/* Return the total size of the allocation of the specified sds string,
* including:
* 1) The sds header before the pointer.
* 2) The string.
* 3) The free buffer at the end if any.
* 4) The implicit null term.
*/
size_t sdsAllocSize(sds s) {
size_t alloc = sdsalloc(s);
return sdsHdrSize(s[-1])+alloc+1;
}
20、sdsAllocPtr:获取sds对应的sdshdrXX头指针。
/* Return the pointer of the actual SDS allocation (normally SDS strings
* are referenced by the start of the string buffer). */
void *sdsAllocPtr(sds s) {
return (void*) (s-sdsHdrSize(s[-1]));
}
21、sdsMakeRoomFor:增加内存空间,调用该接口后,如果sds返回指针发送变化,所以原sds的指针引用都将失效,需更新到新的sds
/* Enlarge the free space at the end of the sds string so that the caller
* is sure that after calling this function can overwrite up to addlen
* bytes after the end of the string, plus one more byte for nul term.
*
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */
sds sdsMakeRoomFor(sds s, size_t addlen) {
void *sh, *newsh;
size_t avail = sdsavail(s);
size_t len, newlen;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen;
/* Return ASAP if there is enough space left. */
if (avail >= addlen) return s;
len = sdslen(s);
sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC) /* 为了减少malloc,用户申请分配的大小在小于1M时实际分配2倍用户申请空间,如果申请超过1M则追加1M */
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
type = sdsReqType(newlen);
/* Don't use type 5: the user is appending to the string and type 5 is
* not able to remember empty space, so sdsMakeRoomFor() must be called
* at every appending operation. */
if (type == SDS_TYPE_5) type = SDS_TYPE_8;
hdrlen = sdsHdrSize(type);
if (oldtype==type) {
newsh = s_realloc(sh, hdrlen+newlen+1);
if (newsh == NULL) return NULL;
s = (char*)newsh+hdrlen;
} else {/* 如果type不同,因为header的长度是不一样的,所以目前用的策略是重新分配一块新地址 */
/* Since the header size changes, need to move the string forward,
* and can't use realloc */
newsh = s_malloc(hdrlen+newlen+1);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, newlen);
return s;
}
22、sdsIncrLen:增加len
/* Increment the sds length and decrements the left free space at the
* end of the string according to 'incr'. Also set the null term
* in the new end of the string.
*
* This function is used in order to fix the string length after the
* user calls sdsMakeRoomFor(), writes something after the end of
* the current string, and finally needs to set the new length.
*
* Note: it is possible to use a negative increment in order to
* right-trim the string.
*
* Usage example:
*
* Using sdsIncrLen() and sdsMakeRoomFor() it is possible to mount the
* following schema, to cat bytes coming from the kernel to the end of an
* sds string without copying into an intermediate buffer:
*
* oldlen = sdslen(s);
* s = sdsMakeRoomFor(s, BUFFER_SIZE);
* nread = read(fd, s+oldlen, BUFFER_SIZE);
* ... check for nread <= 0 and handle it ...
* sdsIncrLen(s, nread);
*/
void sdsIncrLen(sds s, ssize_t incr) {
unsigned char flags = s[-1];
size_t len;
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5: {
unsigned char *fp = ((unsigned char*)s)-1;
unsigned char oldlen = SDS_TYPE_5_LEN(flags);
assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr)));
*fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS);
len = oldlen+incr;
break;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
len = (sh->len += incr);
break;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
len = (sh->len += incr);
break;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
len = (sh->len += incr);
break;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr)));
len = (sh->len += incr);
break;
}
default: len = 0; /* Just to avoid compilation warnings. */
}
s[len] = '\0';
}