更新2014.3.17,base 2.8.7
1. 一个master支持多个slave连接,slave可以接受其他slave的连接
2. 主从同步时,master和slave都是非阻塞的
1. data redundancy
2. slave作为master的扩展,提供一些read-only的服务
3. 可以将数据持久化放在slave做,从而提升master性能
slaveof <masterip> <masterport>
表示该redis服务作为slave,masterip和masterport分别为master 的ip和port
masterauth <master-password>
slave-serve-stale-data yes
slave-serve-stale-data设置为no则slave返回client错误:"SYNC with master in progress"
repl-ping-slave-period 10
repl-timeout 60
void replicationCron(void) { /*判断是否IO超时*/ if (server.masterhost && server.replstate == REDIS_REPL_TRANSFER && (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout) { redisLog(REDIS_WARNING,"Timeout receiving bulk data from MASTER..."); replicationAbortSyncTransfer(); //终止连接,并设置server.replstate = REDIS_REPL_CONNECT; } /* Timed out master when we are an already connected slave? */ if (server.masterhost && server.replstate == REDIS_REPL_CONNECTED && (time(NULL)-server.master->lastinteraction) > server.repl_timeout) { redisLog(REDIS_WARNING,"MASTER time out: no data nor PING received..."); freeClient(server.master); } /* Check if we should connect to a MASTER */ if (server.replstate == REDIS_REPL_CONNECT) { redisLog(REDIS_NOTICE,"Connecting to MASTER..."); if (connectWithMaster() == REDIS_OK) { //连接master redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync started"); } } /* If we have attached slaves, PING them from time to time. * So slaves can implement an explicit timeout to masters, and will * be able to detect a link disconnection even if the TCP connection * will not actually go down. */ if (!(server.cronloops % (server.repl_ping_slave_period*10))) { listIter li; listNode *ln; listRewind(server.slaves,&li); while((ln = listNext(&li))) { redisClient *slave = ln->value; /* Don't ping slaves that are in the middle of a bulk transfer * with the master for first synchronization. */ if (slave->replstate == REDIS_REPL_SEND_BULK) continue; if (slave->replstate == REDIS_REPL_ONLINE) { /* If the slave is online send a normal ping */ addReplySds(slave,sdsnew("PING\r\n")); } else { /* Otherwise we are in the pre-synchronization stage. * Just a newline will do the work of refreshing the * connection last interaction time, and at the same time * we'll be sure that being a single char there are no * short-write problems. */ if (write(slave->fd, "\n", 1) == -1) { /* Don't worry, it's just a ping. */ } } } } }
int connectWithMaster(void) { int fd; fd = anetTcpNonBlockConnect(NULL,server.masterhost,server.masterport); if (fd == -1) { redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s", strerror(errno)); return REDIS_ERR; } if (aeCreateFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE,syncWithMaster,NULL) == AE_ERR) { close(fd); redisLog(REDIS_WARNING,"Can't create readable event for SYNC"); return REDIS_ERR; } server.repl_transfer_s = fd; server.replstate = REDIS_REPL_CONNECTING; return REDIS_OK; }
void syncCommand(redisClient *c) { /* ignore SYNC if aleady slave or in monitor mode */ if (c->flags & REDIS_SLAVE) return; /* Refuse SYNC requests if we are a slave but the link with our master * is not ok... */ if (server.masterhost && server.replstate != REDIS_REPL_CONNECTED) { addReplyError(c,"Can't SYNC while not connected with my master"); return; } /* SYNC can't be issued when the server has pending data to send to * the client about already issued commands. We need a fresh reply * buffer registering the differences between the BGSAVE and the current * dataset, so that we can copy to other slaves if needed. */ if (listLength(c->reply) != 0) { addReplyError(c,"SYNC is invalid with pending input"); return; } redisLog(REDIS_NOTICE,"Slave ask for synchronization"); /* Here we need to check if there is a background saving operation * in progress, or if it is required to start one */ if (server.bgsavechildpid != -1) { ..... } else { /* Ok we don't have a BGSAVE in progress, let's start one */ redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC"); if (rdbSaveBackground(server.dbfilename) != REDIS_OK) { redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE"); addReplyError(c,"Unable to perform background save"); return; } c->replstate = REDIS_REPL_WAIT_BGSAVE_END; } c->repldbfd = -1; c->flags |= REDIS_SLAVE; c->slaveseldb = 0; listAddNodeTail(server.slaves,c); return; }
void updateSlavesWaitingBgsave(int bgsaveerr) { listNode *ln; int startbgsave = 0; listIter li; listRewind(server.slaves,&li); while((ln = listNext(&li))) { redisClient *slave = ln->value; if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) { startbgsave = 1; slave->replstate = REDIS_REPL_WAIT_BGSAVE_END; } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) { struct redis_stat buf; if (bgsaveerr != REDIS_OK) { freeClient(slave); redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error"); continue; } if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 || redis_fstat(slave->repldbfd,&buf) == -1) { freeClient(slave); redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno)); continue; } slave->repldboff = 0; slave->repldbsize = buf.st_size; slave->replstate = REDIS_REPL_SEND_BULK; aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE); //删除之前的写回调 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) { //注册新的写回调 freeClient(slave); continue; } } } if (startbgsave) { if (rdbSaveBackground(server.dbfilename) != REDIS_OK) { listIter li; listRewind(server.slaves,&li); redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed"); while((ln = listNext(&li))) { redisClient *slave = ln->value; if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) freeClient(slave); } } } }为了避免阻塞应用,每次只传输16K数据
void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) { ...... lseek(slave->repldbfd,slave->repldboff,SEEK_SET); //指针移动到上次发送的位置 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN); //读取16K数据 ...... if ((nwritten = write(fd,buf,buflen)) == -1) { //传输数据到slave if (errno != EAGAIN) { redisLog(REDIS_WARNING,"Write error sending DB to slave: %s", strerror(errno)); freeClient(slave); } return; } slave->repldboff += nwritten; //更新已发送位置 ...... }
/* Call() is the core of Redis execution of a command */ void call(redisClient *c) { long long dirty, start = ustime(), duration; dirty = server.dirty; c->cmd->proc(c); dirty = server.dirty-dirty; duration = ustime()-start; slowlogPushEntryIfNeeded(c->argv,c->argc,duration); if (server.appendonly && dirty > 0) feedAppendOnlyFile(c->cmd,c->db->id,c->argv,c->argc); if ((dirty > 0 || c->cmd->flags & REDIS_CMD_FORCE_REPLICATION) && listLength(server.slaves)) replicationFeedSlaves(server.slaves,c->db->id,c->argv,c->argc); if (listLength(server.monitors)) replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); server.stat_numcommands++; }
1. redis主从复制,并没有增加太多额外代码,但是功能强大,支持多个slave,并且支持slave作为master。
2. redis虽然宣称主从复制无阻塞,但是,由于redis使用单线程服务,而和slave的交互由处理线程统一处理,因此,对性能有一定影响