C实现动态字符串,保证二进制安全,也就是不以\0结尾,redis的实现
typedef char *sds;
typedef char *sds;
/* Note: sdshdr5 is never used, we just access the flags byte directly.
* However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
uint8_t len; /* used */
uint8_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
#define SDS_TYPE_5 0
#define SDS_TYPE_8 1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
sds 本质上是一个char*的指针,指向一块连续的内存
对于不同长度的字符串,通过不同的flags的低三位来区分,alloc是已经分配的长度,len是总长度,buf是柔性数组,真正的字符串的数据
__attribute__((__packed__)) 实现字节对齐
sds sdsnewlen(const void *init, size_t initlen) {
void *sh;
sds s;
// 根据长度获取字符串的type
char type = sdsReqType(initlen);
if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
int hdrlen = sdsHdrSize(type); //计算不同头部所需的长度
unsigned char *fp; /* flags pointer. */
sh = s_malloc(hdrlen+initlen+1);
if (sh == NULL) return NULL;
if (!init)
memset(sh, 0, hdrlen+initlen+1);
s = (char*)sh+hdrlen; //s是指向buf的指针
fp = ((unsigned char*)s)-1;
// 根据类型和入参,初始化sds的头部信息
switch(type) {
...
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
...
}
if (initlen && init)
memcpy(s, init, initlen);
s[initlen] = '\0';
return s;
}
void sdsfree(sds s) {
if (s == NULL) return;
s_free((char*)s-sdsHdrSize(s[-1]));
}
void sdsclear(sds s) {
sdssetlen(s, 0);
s[0] = '\0';
}
sds sdscatsds(sds s, const sds t) {
return sdscatlen(s, t, sdslen(t));
}
sds sdscatlen(sds s, const void *t, size_t len) {
size_t curlen = sdslen(s);
s = sdsMakeRoomFor(s,len);
if (s == NULL) return NULL;
memcpy(s+curlen, t, len);//直接拼接, 保证了二进制安全
sdssetlen(s, curlen+len);
s[curlen+len] = '\0';//加上结束符
return s;
}
其中调用了sdsMakeRoomFor,其主要实现扩容等操作。
有两种情况:
1、若拼接后字符串不大于可用大小,则不需要扩容直接拼接
2、若大于可用大小,则扩容,扩容规则为若小于1MB,则翻倍扩容,大于1MB,按新长度+1MB扩容
3、根据扩容情况,对内存进行remalloc或malloc操作
具体实现代码:
sds sdsMakeRoomFor(sds s, size_t addlen) {
void *sh, *newsh;
size_t avail = sdsavail(s);
size_t len, newlen;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen;
// 如果柔性数组有足够的剩余空间,直接返回
if (avail >= addlen) return s;
// 计算扩容的方式,根据大于1Mb来区分扩容方式
len = sdslen(s);
sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
type = sdsReqType(newlen);
// 类型5是不安全的
if (type == SDS_TYPE_5) type = SDS_TYPE_8;
hdrlen = sdsHdrSize(type);
// 如果类型不变,则realloc,否则malloc,这样若后面有空间足够扩容,则会减少拷贝次数
if (oldtype==type) {
newsh = s_realloc(sh, hdrlen+newlen+1);
if (newsh == NULL) return NULL;
s = (char*)newsh+hdrlen;
} else {
// 类型变化了,需要都重新分配赋值
newsh = s_malloc(hdrlen+newlen+1);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, newlen);
return s;
}