阅读Redis源码,从Redis的数据结构开始。sdshdr
Redis并没有使用C语言原生字符串,而是使用SDS(简单动态字符串),阅读源码来理解Redis作者是怎设计SDS,来处理我们平时使用C字符串所碰到的问题。
一、SDS定义
struct sdshdr {
int len; //buf已占用的空间长度
int free; //buf中剩余的空间长度
char but[]; //数据 真实存储c字符串
}
二、SDS与C字符串的区别
c语言的字符串很简单,用N+1的长度来标示长度为N的字符串
char cRedis[6] = "Redis";
那SDS与C字符串有哪些区别了?
A.避免缓冲区溢出
c语言的拼接字符串方法
char *strcat(char *dest, const char *src)
该函数需要程序员去确认dest是否有足够的空间容纳下src的内容,如果没办法容纳,则会造成溢出
而SDS在拼接字符串的时候,会杜绝溢出可能.
- SDS调用拼接函数,构造出拼接字符串的长度
sds sdscat(sds s, const char *t) {
return sdscatlen(s, t, strlen(t)):
}
- SDS在拼接开始,调用sdsMakeRoomFor来决定是否进行扩容
sds makeRoomFor(sds s, size_t addlen) {
struct sdshdr *sh, *newsh;
size_t free = sdsavail(s); //获取s目前的剩余空间
if (free > addlen) return s; //无需扩展
size_t len, newlen;
len = sdslen(s);
sh = (void*)(s-sizeof(struct sdshdr))):
newlen = (len + adulen);
//redis扩容规则
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PERALLOC
newsh = zrelloc(sh, sizeof(struct sdshdr) + newlen + 1);
if (news == NULL) return NULL;
newsh->free = newlen - len;
return newsh->buf;
}
- SDS拼接函数
sds sdscatlen(sds s, const void *t, size_t len) {
struct sdshdr *sh;
size_t curlen = sdslen(s);
s = sdsMakeRoomFor(s, len);
if (s == NULL) return NULL;
sh = (void *)(s-sizeof(struct sdshdr));
memcpy(s+curlen, t, len);//赋值t中的内容到字符串尾部
sh->len = curlen + len;
sh->free = sh->free - len;
s[curlen+len] = '\0';
return s;
}
B、减少修改字符串带来的内存重新分配
C字符串的底层是一个定长的数组,每一次对字符串增长或者缩短的时候,都要进行内存重新分配
- 增长C字符串,比如拼接,需要重新分配底层数据空间大小,如果忘记,则会造成溢出。
- 缩短字符串,比如截断,需要释放不再使用的空间,否则造成内存泄露。
Redis数据库使用中,数据被频繁修改,C字符串会带来的频繁内存重分配,降低写速度,并带来性能问题. 而Redis的sdshdr解除了字符串长度和底层数组之间的长度关联,并通过空间预分配和惰性空间释放来减少内存重分配次数,提高性能
- 空间预分配:扩容过程中,如果需求长度小于1MB,则double长度,如果大约1MB,则需求长度+1MB.通过此策略,减少内存重分配次数。
//redis扩容规则
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PERALLOC
- 惰性空间释放:当修改并缩短sds字符串,sds并不回收多出来的字节,而是仅仅通过free属性来记录可以使用的空间。以sdstrim代码为例:
sds sdstrim(sds s, const char *cset) {
struct sdshdr *sh = (void*)(s- sizeof(struct sdshdr));
char *start, *end, *sp, *ep;
sp = start = s;
ep = end = s + sdslen(s) + 1;
while(sp <= end && strchr(cset, *sp)) sp++;
while(ep > start && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1); //计算trim后的字符串长度
if (sh->buf != sp) memmove(sh->buf, sp, len);
sp->buf[len] = '\0';
sh->free = sh->free + (sh->len -len);
sh->len = len;
return s;
}
C.获取字符串长度复杂度为O(1),而C字符串为O(N);清理字符串,因为是惰性释放,时间复杂度也是O(1)
//获取字符串长度
static inline size_t sdslen(const sds s) {
struct sdshdr *sh = (void*)(s- sizeof(struct sdshdr));
return sh->len;
}
//清理字符串
void sdsclear(sds s) {
struct sdshdr *sh = (void*)(s-sizeof(struct sdshdr));
sh->free += sh->len;
sh->buf[0] = '\0';
}
D.兼容C字符串的一些特性。
- 对比sdshdr字符串与C字符串是否一致
strcasecmp(sdshdr->buf, "hello world");
- 添加sdshdr保存的字符串到C字符串之后
strcat(c_string, sds->buf);
三、SDS里面的其他方法
- 把数组根据分隔符拼接成字符串
sds sdsjoin(char **argv, int argc, char *sep) {
sds join = sdsempty();
int j;
for (j = 0; j < argc, j ++) {
join = sdscat(join,argv[j]);
if(j != argc -1) join = sdscat(join,sep);
}
return join;
}
- sds字符串格式化
/**
* %s - C String
* %S - SDS string
* %i - signed int
* %I - 64 bit signed integer (long long, int64_t)
* %u - unsigned int
* %U - 64 bit unsigned integer (unsigned long long, uint64_t)
* %% - Verbatim "%" character.
**/
sds sdscatfmt(sds s, char const *fmt,...) {
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
size_t initlen = sdslen(s);
const char *f = fat;
int i;
va_list ap;
va_start(ap,fmt);
f = fmt; /* Next format specifier byte to process. */
i = initlen; /* Position of the next byte to write to dest str. */
while(*f) {
char next, *str;
size_t l;
long long num;
unsigned long long unum;
/* Make sure there is always space for at least 1 char. */
if (sh->free == 0) {
s = sdsMakeRoomFor(s,1);
sh = (void*) (s-(sizeof(struct sdshdr)));
}
switch(*f) {
case '%':
next = *(f+1);
f++;
switch(next) {
case 's':
case 'S':
str = va_arg(ap,char*);
l = (next == 's') ? strlen(str) : sdslen(str);
if (sh->free < l) {
s = sdsMakeRoomFor(s,l);
sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,str,l);
sh->len += l;
sh->free -= l;
i += l;
break;
case 'i':
case 'I':
if (next == 'i')
num = va_arg(ap,int);
else
num = va_arg(ap,long long);
{
char buf[SDS_LLSTR_SIZE];
l = sdsll2str(buf,num);
if (sh->free < l) {
s = sdsMakeRoomFor(s,l);
sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
sh->len += l;
sh->free -= l;
i += l;
}
break;
case 'u':
case 'U':
if (next == 'u')
unum = va_arg(ap,unsigned int);
else
unum = va_arg(ap,unsigned long long);
{
char buf[SDS_LLSTR_SIZE];
l = sdsull2str(buf,unum);
if (sh->free < l) {
s = sdsMakeRoomFor(s,l);
sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
sh->len += l;
sh->free -= l;
i += l;
}
break;
default: /* Handle %% and generally %. */
s[i++] = next;
sh->len += 1;
sh->free -= 1;
break;
}
break;
default:
s[i++] = *f;
sh->len += 1;
sh->free -= 1;
break;
}
f++;
}
va_end(ap);
/* Add null-term */
s[i] = '\0';
return s;
}
- 复制字符串
sds sdscpy(sds s, const char *t, size_t len) {
struct sdshdr *sh = (void*)(s- sizeof(struct sdshdr));
size_t totlen = sh->free + sh->len; //buf现有长度
if (totlen < len) {
s = sdsMakeRoomFor(s, len-sh->len);
if (s ==NULL) return NULL;
sh = (void*)(s- sizeof(struct sdshdr));
totlen = sh->free + sh ->len;
}
memcpy(s, t, len);
s[len] = '\0';
sh->len = len;
sh->free = totlen -len;
return s;
}
- 截取字符串
void sdsrange(sds s, int start, int end) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t newlen, len = sdslen(s);
if (len == 0) return;
if (start < 0) {
start = len+start;
if (start < 0) start = 0;
}
if (end < 0) {
end = len+end;
if (end < 0) end = 0;
}
newlen = (start > end) ? 0 : (end-start)+1;
if (newlen != 0) {
if (start >= (signed)len) {
newlen = 0;
} else if (end >= (signed)len) {
end = len-1;
newlen = (start > end) ? 0 : (end-start)+1;
}
} else {
start = 0;
}
if (start && newlen) memmove(sh->buf, sh->buf+start, newlen); // 如果有需要,对字符串进行移动
sh->buf[newlen] = 0;
sh->free = sh->free+(sh->len-newlen);
sh->len = newlen;
}