环境说明:redis源码版本 5.0.3;我在阅读源码过程做了注释,git地址:https://gitee.com/xiaoangg/redis_annotation
参考书籍:《redis的设计与实现》
目录
事件(上)文件事件
一 时间事件
二 时间事件的实现
三 事件的调度与执行
redis的时间事件分为两大类
一个「时间事件」属性组成:redis的「时间事件」结构体定义位于ae.h/aeTimeEvent
#define AE_NOMORE -1
/**
* 时间事件结构
*/
/* Time event structure */
typedef struct aeTimeEvent {
long long id; /* time event identifier. */ //事件id,全局唯一id,自增;
//when_sec和when_ms 记录什么时候执行该事件;阅读 ae.c中processTimeEvents函数可以理解该两个属性的用途
long when_sec; /* seconds */ // 记录事件到达的时间(秒)
long when_ms; /* milliseconds */ //记录事件到达的时间(毫秒)(
aeTimeProc *timeProc;
aeEventFinalizerProc *finalizerProc; //最终执行的事件处理函数(ae.c中processTimeEvents函数中,可以看到该函数的调用逻辑)
void *clientData;
struct aeTimeEvent *prev;
struct aeTimeEvent *next; //事件双向连表
} aeTimeEvent;
服务器将所有的「时间事件」放到一个无序链表中(ae.c/aeCreateTimeEvent)。
当事件执行时,会遍历所有链表,判断已经到达的事件,并执行相应的事件处理器;
执行代码位于ae.c/processTimeEvents
/**
* 「时间事件」处理函数
*
* 返回处理的「事件」的数量
*/
/* Process time events */
static int processTimeEvents(aeEventLoop *eventLoop) {
int processed = 0;
aeTimeEvent *te;
long long maxId;
time_t now = time(NULL); //获取当前时间戳
/*
* 如果系统时间被设置回了过去时间,那么设置lastTime属性为正确的时间,「时间事件」将会随机方式延时执行;
* 这就意味着之前 预定的操作 不会及时的执行;
*
* 我们尝试发现系统时钟偏差,并且当发现时钟偏差时,强制所有「时间事件」尽快的执行;
* 实践证明尽快处理 比 延时处理造成的危害更小;
*/
/* If the system clock is moved to the future, and then set back to the
* right value, time events may be delayed in a random way. Often this
* means that scheduled operations will not be performed soon enough.
*
* Here we try to detect system clock skews, and force all the time
* events to be processed ASAP when this happens: the idea is that
* processing events earlier is less dangerous than delaying them
* indefinitely, and practice suggests it is. */
if (now < eventLoop->lastTime) {
te = eventLoop->timeEventHead;
while(te) {
te->when_sec = 0; //设置「时间事件」立即执行
te = te->next;
}
}
eventLoop->lastTime = now;//设置回当前时间
//开始遍历「时间事件」链表,执行事件
te = eventLoop->timeEventHead;
maxId = eventLoop->timeEventNextId-1;
while(te) {
long now_sec, now_ms;
long long id;
//如果事件已经删除状态来,将事件从链表中移除
/* Remove events scheduled for deletion. */
if (te->id == AE_DELETED_EVENT_ID) {
aeTimeEvent *next = te->next;
if (te->prev)
te->prev->next = te->next;
else
eventLoop->timeEventHead = te->next;
if (te->next)
te->next->prev = te->prev;
if (te->finalizerProc) //执行终结处理函数
te->finalizerProc(eventLoop, te->clientData);
zfree(te);
te = next;
continue;
}
/**
* 确保不处理 由当前「时间事件」创建的「时间事件」
* 注意当前这个检查是无用的,因为新创建的「时间事件」总是添加到链表的头部;
* 但是当修改了实现细节 也许这个判断会起作用,所以先留在这里;
*/
/* Make sure we don't process time events created by time events in
* this iteration. Note that this check is currently useless: we always
* add new timers on the head, however if we change the implementation
* detail, this check may be useful again: we keep it here for future
* defense. */
if (te->id > maxId) {
te = te->next;
continue;
}
aeGetTime(&now_sec, &now_ms); //获取当前的时间戳 秒 和 毫秒
//当前时间已经超过了 「时间事件」的「执行时间点」,开始执行;
if (now_sec > te->when_sec ||
(now_sec == te->when_sec && now_ms >= te->when_ms))
{
int retval;
id = te->id;
retval = te->timeProc(eventLoop, id, te->clientData);
processed++;
if (retval != AE_NOMORE) { //返回非AE_NOMORE 说明是周期性事件,更新下次执行时间
aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms);
} else { //否则删除事件
te->id = AE_DELETED_EVENT_ID;
}
}
te = te->next;
}
return processed;
}
目前redis版本只有一个时间事件:servce.c/serverCron;在benchmark模式下,也只使用了两个时间事件。所以无序链表并不会影响事件的执行性能;
redis的事件调度函数位于ae.c/aeProcessEvents
/*
* 事件调度函数
* 处理每个挂起的「时间事件」,然后处理每一个挂起的「文件事件」;
*
* 没有特殊的标记时 函数将休眠,直到触发「文件事件」 或是「时间事件」发生;
*/
/* Process every pending time event, then every pending file event
* (that may be registered by time event callbacks just processed).
* Without special flags the function sleeps until some file event
* fires, or when the next time event occurs (if any).
*
* If flags is 0, the function does nothing and returns.
* if flags has AE_ALL_EVENTS set, all the kind of events are processed.
* if flags has AE_FILE_EVENTS set, file events are processed.
* if flags has AE_TIME_EVENTS set, time events are processed.
* if flags has AE_DONT_WAIT set the function returns ASAP until all
* if flags has AE_CALL_AFTER_SLE EP set, the af tersleep callback is called.
* the events that's possible to process without to wait are processed.
*
* The function returns the number of events processed. */
int aeProcessEvents(aeEventLoop *eventLoop, int flags)
{
int processed = 0, numevents;
/* Nothing to do? return ASAP */
if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0;
/* Note that we want call select() even if there are no
* file events to process as long as we want to process time
* events, in order to sleep until the next time event is ready
* to fire. */
if (eventLoop->maxfd != -1 || //如果注册过文件事件
((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {
int j;
aeTimeEvent *shortest = NULL;
struct timeval tv, *tvp;
//查询最近要发生的「时间事件」
if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT))
shortest = aeSearchNearestTimer(eventLoop);
if (shortest) {
long now_sec, now_ms;
aeGetTime(&now_sec, &now_ms);
tvp = &tv;
//距离最近的「时间事件」还有多长时间触发;
/* How many milliseconds we need to wait for the next
* time event to fire? */
long long ms =
(shortest->when_sec - now_sec)*1000 +
shortest->when_ms - now_ms;
if (ms > 0) {
tvp->tv_sec = ms/1000;
tvp->tv_usec = (ms % 1000)*1000;
} else {
tvp->tv_sec = 0;
tvp->tv_usec = 0;
}
} else {
/* If we have to check for events but need to return
* ASAP because of AE_DONT_WAIT we need to set the timeout
* to zero */
if (flags & AE_DONT_WAIT) { //立即执行「时间事件」
tv.tv_sec = tv.tv_usec = 0;
tvp = &tv;
} else {
/* Otherwise we can block */
tvp = NULL; /* wait forever */
}
}
//apiPoll的堵塞时间是由最近的「时间事件」来决定,必要最近的时间事件还有2s触发,则apiPoll就堵塞2s
//这样可以避免服务器对「时间事件」频繁轮询;
/* Call the multiplexing API, will return only on timeout or when
* some event fires. */
numevents = aeApiPoll(eventLoop, tvp);
/* After sleep callback. */
if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP)
eventLoop->aftersleep(eventLoop);
//处理所有的文件事件
for (j = 0; j < numevents; j++) {
aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
int mask = eventLoop->fired[j].mask;
int fd = eventLoop->fired[j].fd;
int fired = 0; /* Number of events fired for current fd. */
/* Normally we execute the readable event first, and the writable
* event laster. This is useful as sometimes we may be able
* to serve the reply of a query immediately after processing the
* query.
*
* However if AE_BARRIER is set in the mask, our application is
* asking us to do the reverse: never fire the writable event
* after the readable. In such a case, we invert the calls.
* This is useful when, for instance, we want to do things
* in the beforeSleep() hook, like fsynching a file to disk,
* before replying to a client. */
int invert = fe->mask & AE_BARRIER;
/* Note the "fe->mask & mask & ..." code: maybe an already
* processed event removed an element that fired and we still
* didn't processed, so we check if the event is still valid.
*
* Fire the readable event if the call sequence is not
* inverted. */
if (!invert && fe->mask & mask & AE_READABLE) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
/* Fire the writable event. */
if (fe->mask & mask & AE_WRITABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->wfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
}
/* If we have to invert the call, fire the readable event now
* after the writable one. */
if (invert && fe->mask & mask & AE_READABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
}
processed++;
}
}
/*处理时间事件*/
/* Check time events */
if (flags & AE_TIME_EVENTS)
processed += processTimeEvents(eventLoop);
return processed; /* return the number of processed file/time events */
}
aePeocessEvents函数位于一个循环里;加上服务器初始化函数和清理函数,就构成redis服务器的主函数;
//servce.c/main int main(int argc, char **argv) { /* .... 读取配置 .... */ //初始化服务 initServer(); /* ........ */ //事件处理函数 aeMain(server.el); /* ..... */ //清理 aeDeleteEventLoop(server.el); }