下面是redis启动的入口程序server.c,简单列了主程序中比较关键的一些点。
## server.c
int main(int argc, char **argv) {
// 1.oom处理器
zmalloc_set_oom_handler(redisOutOfMemoryHandler); // 设置OOM处理器
// 2.构建hash种子
char hashseed[16];
getRandomHexChars(hashseed,sizeof(hashseed));
// 3.初始化redis默认配置
initServerConfig();
//4.如果是哨兵模式,则进行哨兵初始化
if (server.sentinel_mode) {
initSentinelConfig();
initSentinel();
}
// 5.检查备份文件是否完整
if (strstr(argv[0],"redis-check-rdb") != NULL)
// 检查RDB文件是否完整
redis_check_rdb_main(argc,argv,NULL);
else if (strstr(argv[0],"redis-check-aof") != NULL)
// 检查AOF文件
redis_check_aof_main(argc,argv);
// 6.初始化服务器
initServer();
// 哨兵模式与非哨兵模式处理
if (!server.sentinel_mode) {
// 从磁盘上读取AOF或RDB进行数据恢复
loadDataFromDisk();
} else {
// 哨兵启动
sentinelIsRunning();
}
// 主循环执行之前的函数beforeSleep
aeSetBeforeSleepProc(server.el,beforeSleep);
// 主循环执行之后的函数
aeSetAfterSleepProc(server.el,afterSleep);
// 事件驱动进行事件处理
aeMain(server.el)
// 移除事件驱动循环
aeDeleteEventLoop(server.el);
}
initServerConfig
初始化默认配置initServerConfig主要是初始化redis的默认配置
server.aof_state = AOF_OFF;
默认AOF关闭
server.aof_fsync = CONFIG_DEFAULT_AOF_FSYNC;
AOF的默认刷盘方式
server.aof_rewrite_perc = AOF_REWRITE_PERC;
aof重写每秒写一次
server.aof_rewrite_min_size = AOF_REWRITE_MIN_SIZE;
重写的最小大小
server.rdb_filename = zstrdup(CONFIG_DEFAULT_RDB_FILENAME);
rdb的文件名
server.aof_filename = zstrdup(CONFIG_DEFAULT_AOF_FILENAME);
aof文件名
server.rdb_compression = CONFIG_DEFAULT_RDB_COMPRESSION; rdb的压缩方式
server.rdb_checksum = CONFIG_DEFAULT_RDB_CHECKSUM; rdb文件完整性校验方式
server.masterport = 6379; 默认端口
// 初始化命令redisCommandTable存储着redis所有对外暴露的命令
populateCommandTable(); // 将redisCommandTable数组的命令转为字典存储,方便通过命令名称查找命令
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-uiu2SjF7-1649254774401)(https://s3-us-west-2.amazonaws.com/secure.notion-static.com/b5f808b5-cead-4575-b0c6-d53c8e5b7ba0/Untitled.png)]
在initServer中最核心的莫过于申请默认的16个db的空间并进行初始化,然后就是打开tcp端口监听。
void initServer(void) {
// 创建全局共享变量,主要是一些常用的字符串
createSharedObjects();
// 调整可打开文件数(客户端数量限制)
adjustOpenFilesLimit();
// 创建事件循环
server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
//申请16个db数据库锁使用的内存
server.db = zmalloc(sizeof(redisDb)*server.dbnum);
// 进行端口监听
listenToPort(server.port,server.ipfd,&server.ipfd_count)
// 打开unix套接字
server.sofd = anetUnixServer(server.neterr,server.unixsocket,
server.unixsocketperm, server.tcp_backlog);
//创建redis数据库
for (j = 0; j < server.dbnum; j++) {
// 数据字典
server.db[j].dict = dictCreate(&dbDictType,NULL);
// 过期key字典
server.db[j].expires = dictCreate(&keyptrDictType,NULL);
阻塞key字典
server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL);
server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
server.db[j].id = j;
server.db[j].avg_ttl = 0;
server.db[j].defrag_later = listCreate();
}
// 初始化lru的可以链表池
evictionPoolAlloc();
//pub sub 发布订阅字典创建
server.pubsub_channels = dictCreate(&keylistDictType,NULL);
//时间事件 serverCron 用来做后天处理事件(客户端过期,剔除过去key等等)
if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
serverPanic("Can't create event loop timers.");
exit(1);
}
// 处理文件事件(针对server.ipfd)(用来处理tcp连接)
for (j = 0; j < server.ipfd_count; j++) {
if (aeCreateFileEvent(server.el, server.ipfd[j], AE_READABLE,
acceptTcpHandler,NULL) == AE_ERR)
{
serverPanic(
"Unrecoverable error creating server.ipfd file event.");
}
}
// unix socket 文件事件
aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
acceptUnixHandler,NULL)
// 针对server.module_blocked_pipe[0]创建一个文件事件,用来唤醒事件循环去处理客户端的命令
aeCreateFileEvent(server.el, server.module_blocked_pipe[0], AE_READABLE,
moduleBlockedClientPipeReadable,NULL)
// 如果开启了aof
if (server.aof_state == AOF_ON) {
// 创建aof的文件描述符
server.aof_fd = open(server.aof_filename,
O_WRONLY|O_APPEND|O_CREAT,0644);
if (server.aof_fd == -1) {
serverLog(LL_WARNING, "Can't open the append-only file: %s",
strerror(errno));
exit(1);
}
}
// 集群初始化
if (server.cluster_enabled) clusterInit();
}
redis默认是有16个数据库的,通常我们一般使用的0库
typedef struct redisDb {
dict *dict; /* The keyspace for this DB 数据字典*/
dict *expires; /* Timeout of keys with a timeout set 有过期时间的key */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) 等待获取数据的key*/
dict *ready_keys; /* Blocked keys that received a PUSH 等待push操作的可以*/
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS 等待muti,exec,cas操作的key*/
int id; /* Database ID 数据库序号id*/
long long avg_ttl; /* Average TTL, just for stats 平均生存时间*/
list *defrag_later; /* List of key names to attempt to defrag one by one, gradually. 碎片整理使用*/
} redisDb;
loadAppendOnlyFile(server.aof_filename) 加载aof文件
void loadDataFromDisk(void) {
long long start = ustime();
//如果有aof则加载AOF
if (server.aof_state == AOF_ON) {
// 从aof文件中读取命令进行执行
if (loadAppendOnlyFile(server.aof_filename) == C_OK)
serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
} else {
rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
//加载RDB
if (rdbLoad(server.rdb_filename,&rsi) == C_OK) {
serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds",
(float)(ustime()-start)/1000000);
/* Restore the replication ID / offset from the RDB file. */
if (server.masterhost &&
rsi.repl_id_is_set &&
rsi.repl_offset != -1 &&
/* Note that older implementations may save a repl_stream_db
* of -1 inside the RDB file in a wrong way, see more information
* in function rdbPopulateSaveInfo. */
rsi.repl_stream_db != -1)
{
memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
server.master_repl_offset = rsi.repl_offset;
/* If we are a slave, create a cached master from this
* information, in order to allow partial resynchronizations
* with masters. */
replicationCacheMasterUsingMyself();
selectDb(server.cached_master,rsi.repl_stream_db);
}
} else if (errno != ENOENT) {
serverLog(LL_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno));
exit(1);
}
}
}
主要工作内容:
void beforeSleep(struct aeEventLoop *eventLoop) {
UNUSED(eventLoop);
/* 如果开启了集群先走clusterBeforeSleep */
if (server.cluster_enabled) clusterBeforeSleep();
/* Run a fast expire cycle (the called function will return
* ASAP if a fast cycle is not needed). */
if (server.active_expire_enabled && server.masterhost == NULL)
activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
/* Send all the slaves an ACK request if at least one client blocked
* during the previous event loop iteration.
* 如果有一个客户端在上次事件循环中被阻塞,则向所有的slave发送ack请求
*/
if (server.get_ack_from_slaves) {
robj *argv[3];
argv[0] = createStringObject("REPLCONF",8);
argv[1] = createStringObject("GETACK",6);
argv[2] = createStringObject("*",1); /* Not used argument. */
replicationFeedSlaves(server.slaves, server.slaveseldb, argv, 3);
decrRefCount(argv[0]);
decrRefCount(argv[1]);
decrRefCount(argv[2]);
server.get_ack_from_slaves = 0;
}
/* Unblock all the clients blocked for synchronous replication
* in WAIT. 解锁所有客户端因为副本同步而产生的等待*/
if (listLength(server.clients_waiting_acks))
processClientsWaitingReplicas();
/* Check if there are clients unblocked by modules that implement
* blocking commands. */
moduleHandleBlockedClients();
/* Try to process pending commands for clients that were just unblocked.
处理刚刚解锁的客户端的命令
*/
if (listLength(server.unblocked_clients))
processUnblockedClients();
/* Write the AOF buffer on disk 写aof缓冲到文件中去 */
flushAppendOnlyFile(0);
/* Handle writes with pending output buffers. */
//处理向客户端写入数据
handleClientsWithPendingWrites();
/* Before we are going to sleep, let the threads access the dataset by
* releasing the GIL. Redis main thread will not touch anything at this
* time. */
if (moduleCount()) moduleReleaseGIL();
}
void aeMain(aeEventLoop *eventLoop) {
eventLoop->stop = 0;
while (!eventLoop->stop) {
if (eventLoop->beforesleep != NULL)
// 如果beforesleep不为null,则在每次循环的时候都执行一次beforesleep
eventLoop->beforesleep(eventLoop);
// 重中之重:处理事件
aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP);
}
}
rfileProc(eventLoop,fd,fe->clientData,mask)
处理读事件(文件事件)wfileProc(eventLoop,fd,fe->clientData,mask)
处理写事件(文件事件)processTimeEvents(eventLoop);
处理时间事件/* 处理时间事件(后台事件)
* 处理文件事件(网络时间,fd的变化)
* 如果flags没有特殊标识,则此函数会进入睡眠状态,直到有文件事件或者时间时间触发
*
* If flags is 0, 啥也不做,就返回.
* if flags has AE_ALL_EVENTS set, 所有事件类型都处理.
* if flags has AE_FILE_EVENTS set, 文件事件处理.
* if flags has AE_TIME_EVENTS set, 时间事件处理.
* if flags has AE_DONT_WAIT 返回 ASAP
* if flags has AE_CALL_AFTER_SLEEP set, 调用aftersleep函数
* 处理那些无需等待即可处理的事件
*
* The function returns the number of events processed. */
int aeProcessEvents(aeEventLoop *eventLoop, int flags)
{
int processed = 0, numevents;
/* Nothing to do? return ASAP */
if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0;
/* Note that we want call select() even if there are no
* file events to process as long as we want to process time
* events, in order to sleep until the next time event is ready
* to fire. */
if (eventLoop->maxfd != -1 ||
((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {
int j;
aeTimeEvent *shortest = NULL;
struct timeval tv, *tvp;
if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT))
// 搜索最近的时间事件触发节点
shortest = aeSearchNearestTimer(eventLoop);
if (shortest) {
long now_sec, now_ms;
aeGetTime(&now_sec, &now_ms);
tvp = &tv;
/* How many milliseconds we need to wait for the next
* time event to fire? 计算时间事件下次处理的时间节点 */
long long ms =
(shortest->when_sec - now_sec)*1000 +
shortest->when_ms - now_ms;
if (ms > 0) {
tvp->tv_sec = ms/1000;
tvp->tv_usec = (ms % 1000)*1000;
} else {
tvp->tv_sec = 0;
tvp->tv_usec = 0;
}
} else {
/* If we have to check for events but need to return
* ASAP because of AE_DONT_WAIT we need to set the timeout
* to zero */
if (flags & AE_DONT_WAIT) {
tv.tv_sec = tv.tv_usec = 0;
tvp = &tv;
} else {
/* Otherwise we can block */
tvp = NULL; /* wait forever */
}
}
/* Call the multiplexing API, will return only on timeout or when
* some event fires. */
numevents = aeApiPoll(eventLoop, tvp);
/* After sleep callback. */
if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP)
eventLoop->aftersleep(eventLoop);
for (j = 0; j < numevents; j++) {
aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
int mask = eventLoop->fired[j].mask;
int fd = eventLoop->fired[j].fd;
int fired = 0; /* Number of events fired for current fd. */
/* Normally we execute the readable event first, and the writable
* event laster. This is useful as sometimes we may be able
* to serve the reply of a query immediately after processing the
* query.
*
* However if AE_BARRIER is set in the mask, our application is
* asking us to do the reverse: never fire the writable event
* after the readable. In such a case, we invert the calls.
* This is useful when, for instance, we want to do things
* in the beforeSleep() hook, like fsynching a file to disk,
* before replying to a client. */
int invert = fe->mask & AE_BARRIER;
/* Note the "fe->mask & mask & ..." code: maybe an already
* processed event removed an element that fired and we still
* didn't processed, so we check if the event is still valid.
*
* Fire the readable event if the call sequence is not
* inverted. */
if (!invert && fe->mask & mask & AE_READABLE) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
/* Fire the writable event. */
if (fe->mask & mask & AE_WRITABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->wfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
}
/* If we have to invert the call, fire the readable event now
* after the writable one. */
if (invert && fe->mask & mask & AE_READABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
}
processed++;
}
}
/* Check time events */
if (flags & AE_TIME_EVENTS)
processed += processTimeEvents(eventLoop);
return processed; /* return the number of processed file/time events */
}
void evictionPoolAlloc(void) {
// lru结构体
struct evictionPoolEntry *ep;
int j;
// 申请内存
ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
//初始化
for (j = 0; j < EVPOOL_SIZE; j++) {
ep[j].idle = 0;
ep[j].key = NULL;
ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
ep[j].dbid = 0;
}
// EvictionPoolLRU 接管刚创建的lru池
EvictionPoolLRU = ep;
}
struct evictionPoolEntry {
// LRU 空闲时间 / LFU 频率倒数(优先淘汰该值较大的记录)
unsigned long long idle; /* Object idle time (inverse frequency for LFU) */
// 参与淘汰筛选的key
sds key; /* Key name. */
// key名称的sds对象缓存 键名缓存
sds cached; /* Cached SDS object for key name. */
// db的编号
int dbid; /* Key DB number. */
};
核心流程:
aeProcessEvents
来处理时间事件和文件事件