本文为作者原创,转载请注明出处:http://my.oschina.net/fuckphp/blog/350815
本文代码可以在 src/rdb.h 和 src/rdb.c 两个文件中找到,设计其余文件的会注明。
redis 中 rdb作为Redis实现持久化的一部分,主要用于将数据库中的内存数据,通过对不同类型的处理后直接导出在硬盘中,并且在需要的时候来恢复数据,Redis主从同步 主全量推送 到从的时候主要通过 发送rdb 文件来实现。
redis在进行rdbSave时候的执行如图:
rdbSave:(如下代码定义在 src/rdb.c 中):
Redis中实现rdb操作的主函数,所有的流程逻辑都在此处
int rdbSave(char *filename) { //字典迭代器 dictIterator *di = NULL; dictEntry *de; char tmpfile[256]; char magic[10]; int j; long long now = mstime(); FILE *fp; //创建rio对象 rio rdb; uint64_t cksum; //临时文件 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid()); //创建一个临时文件 fp = fopen(tmpfile,"w"); if (!fp) { redisLog(REDIS_WARNING, "Failed opening .rdb for saving: %s", strerror(errno)); return REDIS_ERR; } //初始化 file rio rioInitWithFile(&rdb,fp); //设置校验和计算函数 if (server.rdb_checksum) rdb.update_cksum = rioGenericUpdateChecksum; //RDB版本 snprintf(magic,sizeof(magic),"REDIS%04d",REDIS_RDB_VERSION); //临时文件中写入rdb版本号 if (rdbWriteRaw(&rdb,magic,9) == -1) goto werr; //遍历每一个数据库 for (j = 0; j < server.dbnum; j++) { //设置 当前遍历的数据库 redisDb *db = server.db+j; //取到key space对应的字典 dict *d = db->dict; if (dictSize(d) == 0) continue; //创建一个安全迭代 di = dictGetSafeIterator(d); //安全迭代失败返回错误 if (!di) { fclose(fp); return REDIS_ERR; } //写入当前执行rdb的数据库号 if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_SELECTDB) == -1) goto werr; //通过位运算压缩写入长度 详细见后文 if (rdbSaveLen(&rdb,j) == -1) goto werr; //进行安全迭代 while((de = dictNext(di)) != NULL) { //获取当前遍历的key val sds keystr = dictGetKey(de); robj key, *o = dictGetVal(de); long long expire; //初始化一个string类型的Redis Object initStaticStringObject(key,keystr); //获取key的过期时间 expire = getExpire(db,&key); //存储 key value if (rdbSaveKeyValuePair(&rdb,&key,o,expire,now) == -1) goto werr; } //释放安全迭代器 dictReleaseIterator(di); } di = NULL; ; /* EOF opcode */ //标示rdb结束 if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_EOF) == -1) goto werr; //更新校验和,并写入校验和 cksum = rdb.cksum; //翻转校验和 memrev64ifbe(&cksum); rioWrite(&rdb,&cksum,8); //将剩余没有 flush的数据 flush到硬盘上 if (fflush(fp) == EOF) goto werr; if (fsync(fileno(fp)) == -1) goto werr; if (fclose(fp) == EOF) goto werr; //使用临时文件替换rdb文件 if (rename(tmpfile,filename) == -1) { redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno)); unlink(tmpfile); return REDIS_ERR; } redisLog(REDIS_NOTICE,"DB saved on disk"); server.dirty = 0; //更新最后修改时间 server.lastsave = time(NULL); //更新状态 server.lastbgsave_status = REDIS_OK; return REDIS_OK; werr: fclose(fp); unlink(tmpfile); redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno)); if (di) dictReleaseIterator(di); return REDIS_ERR; }
rdbSaveBackground:
redis 执行rdbSave操作的时候,会阻塞主进程,这时候将无法继续响应客户端请求,对于一个数据库,这样是不可行的,所以Redis 会使用子进程来执行rdbSave操作
int rdbSaveBackground(char *filename) { pid_t childpid; long long start; if (server.rdb_child_pid != -1) return REDIS_ERR; server.dirty_before_bgsave = server.dirty; server.lastbgsave_try = time(NULL); start = ustime(); //创建子进程,并复制父进程的进程空间 if ((childpid = fork()) == 0) { int retval; //子进程关闭继承的socket closeListeningSockets(0); //设置当前状态 redisSetProcTitle("redis-rdb-bgsave"); //执行 rdbSave 操作 retval = rdbSave(filename); if (retval == REDIS_OK) { size_t private_dirty = zmalloc_get_private_dirty(); if (private_dirty) { redisLog(REDIS_NOTICE, "RDB: %zu MB of memory used by copy-on-write", private_dirty/(1024*1024)); } } exitFromChild((retval == REDIS_OK) ? 0 : 1); } else { /* Parent */ server.stat_fork_time = ustime()-start; if (childpid == -1) { server.lastbgsave_status = REDIS_ERR; redisLog(REDIS_WARNING,"Can't save in background: fork: %s", strerror(errno)); return REDIS_ERR; } redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid); server.rdb_save_time_start = time(NULL); server.rdb_child_pid = childpid; updateDictResizePolicy(); return REDIS_OK; } return REDIS_OK; /* unreached */ }
另外一个比较重要的函数:
int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime, long long now) { /* Save the expire time */ if (expiretime != -1) { /* If this key is already expired skip it */ if (expiretime < now) return 0; //如果存在有效期 并且未过期 写入类型 和 过期时间 if (rdbSaveType(rdb,REDIS_RDB_OPCODE_EXPIRETIME_MS) == -1) return -1; if (rdbSaveMillisecondTime(rdb,expiretime) == -1) return -1; } //写入 type key 和 value if (rdbSaveObjectType(rdb,val) == -1) return -1; //保存key信息 if (rdbSaveStringObject(rdb,key) == -1) return -1; //保存value对象 if (rdbSaveObject(rdb,val) == -1) return -1; return 1; }
redis在进行rdbLoad时候的执行如图:
rdbLoad :
int rdbLoad(char *filename) { uint32_t dbid; int type, rdbver; redisDb *db = server.db+0; char buf[1024]; long long expiretime, now = mstime(); FILE *fp; rio rdb; //打开rdb文件 if ((fp = fopen(filename,"r")) == NULL) return REDIS_ERR; rioInitWithFile(&rdb,fp); //设置校验和函数 rdb.update_cksum = rdbLoadProgressCallback; //这只每次处理的最大块 rdb.max_processing_chunk = server.loading_process_events_interval_bytes; //读取redis版本 if (rioRead(&rdb,buf,9) == 0) goto eoferr; buf[9] = '\0'; //验证是否REDIS开头 if (memcmp(buf,"REDIS",5) != 0) { fclose(fp); redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file"); errno = EINVAL; return REDIS_ERR; } //验证版本 必须大于1 并且 比当前版本低 rdbver = atoi(buf+5); if (rdbver < 1 || rdbver > REDIS_RDB_VERSION) { fclose(fp); redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver); errno = EINVAL; return REDIS_ERR; } //标记开始进行加载 startLoading(fp); while(1) { robj *key, *val; expiretime = -1; //读取当前行的类型 if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; //读取秒级别的过期时间 if (type == REDIS_RDB_OPCODE_EXPIRETIME) { if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr; //继续读取redis key的类型 if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; expiretime *= 1000; } else if (type == REDIS_RDB_OPCODE_EXPIRETIME_MS) { //读取毫秒级别的过期时间 if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr; //继续读取redis key的类型 if ((type = rdbLoadType(&rdb)) == -1) goto eoferr; } //如果结束则退出 if (type == REDIS_RDB_OPCODE_EOF) break; //选择切换db if (type == REDIS_RDB_OPCODE_SELECTDB) { if ((dbid = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR) goto eoferr; if (dbid >= (unsigned)server.dbnum) { redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum); exit(1); } db = server.db+dbid; continue; } //加载key if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr; //根据不同的类型的value加载value值 if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr; //如果为redis 主 则忽略过期key,并将key val的引用计数减少 if (server.masterhost == NULL && expiretime != -1 && expiretime < now) { decrRefCount(key); decrRefCount(val); continue; } //将key val加到指定的db中 dbAdd(db,key,val); //如果存在过期时间 则设置过期 if (expiretime != -1) setExpire(db,key,expiretime); //加入完成减少key的引用计数 decrRefCount(key); } //验证redis 版本,验证校验和 if (rdbver >= 5 && server.rdb_checksum) { uint64_t cksum, expected = rdb.cksum; if (rioRead(&rdb,&cksum,8) == 0) goto eoferr; memrev64ifbe(&cksum); if (cksum == 0) { redisLog(REDIS_WARNING,"RDB file was saved with checksum disabled: no check performed."); } else if (cksum != expected) { redisLog(REDIS_WARNING,"Wrong RDB checksum. Aborting now."); exit(1); } } fclose(fp); //标志结束 stopLoading(); return REDIS_OK; eoferr: /* unexpected end of file is handled here with a fatal exit */ redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now."); exit(1); return REDIS_ERR; /* Just to avoid warning */ }
Redis会对不同类型的 redis object 进行不同的处理,将会在下一篇文章介绍每种类型的压缩以及处理方式。
Redis2.8.9源码 src/rdb.h src/rdb.c
流程图工具