由于TrafficServer在2011年才刚刚由Yahoo释放出源码,所以关于TrafficServer的源码分析的文档非常有限,淘宝网的技术专家们对此研究的比较深入,有一些基础的知识可以参照淘宝CDN团队的官方博客。 对TrafficServer的基本使用,以及事件系统,网络子系统有个简单的介绍。
一直到 eventProcessor.start(num_of_net_threads); 这行之前,大部分工作都是在处理配置文件相关的工作。
eventProcessor.start(num_of_net_threads); 内部实现如下: int EventProcessor::start(int n_event_threads) { char thr_name[MAX_THREAD_NAME_LENGTH]; int i; // do some sanity checking. static int started = 0; ink_release_assert(!started); ink_release_assert(n_event_threads > 0 && n_event_threads <= MAX_EVENT_THREADS); started = 1; n_ethreads = n_event_threads; n_thread_groups = 1; int first_thread = 1; for (i = 0; i < n_event_threads; i++) { EThread *t = NEW(new EThread(REGULAR, i)); if (first_thread && !i) { ink_thread_setspecific(Thread::thread_data_key, t); global_mutex = t->mutex; t->cur_time = ink_get_based_hrtime_internal(); } all_ethreads[i] = t; eventthread[ET_CALL][i] = t; t->set_event_type((EventType) ET_CALL); } n_threads_for_type[ET_CALL] = n_event_threads; for (i = first_thread; i < n_ethreads; i++) { snprintf(thr_name, MAX_THREAD_NAME_LENGTH, "[ET_NET %d]", i); all_ethreads[i]->start(thr_name); } Debug("iocore_thread", "Created event thread group id %d with %d threads", ET_CALL, n_event_threads); return 0; }
void EThread::execute() { switch (tt) { case REGULAR: { Event *e; Que(Event, link) NegativeQueue; ink_hrtime next_time = 0; // give priority to immediate events for (;;) { // execute all the available external events that have // already been dequeued cur_time = ink_get_based_hrtime_internal(); while ((e = EventQueueExternal.dequeue_local())) { if (!e->timeout_at) { // IMMEDIATE ink_assert(e->period == 0); process_event(e, e->callback_event); } else if (e->timeout_at > 0) // INTERVAL EventQueue.enqueue(e, cur_time); else { // NEGATIVE Event *p = NULL; Event *a = NegativeQueue.head; while (a && a->timeout_at > e->timeout_at) { p = a; a = a->; } if (!a) NegativeQueue.enqueue(e); else NegativeQueue.insert(e, p); } } bool done_one; do { done_one = false; // execute all the eligible internal events EventQueue.check_ready(cur_time, this); while ((e = EventQueue.dequeue_ready(cur_time))) { ink_assert(e); ink_assert(e->timeout_at > 0); if (e->cancelled) free_event(e); else { done_one = true; process_event(e, e->callback_event); } } } while (done_one); // execute any negative (poll) events if (NegativeQueue.head) { if (n_ethreads_to_be_signalled) flush_signals(this); // dequeue all the external events and put them in a local // queue. If there are no external events available, don't // do a cond_timedwait. if (!INK_ATOMICLIST_EMPTY( EventQueueExternal.dequeue_timed(cur_time, next_time, false); while ((e = EventQueueExternal.dequeue_local())) { if (!e->timeout_at) process_event(e, e->callback_event); else { if (e->cancelled) free_event(e); else { // If its a negative event, it must be a result of // a negative event, which has been turned into a // timed-event (because of a missed lock), executed // before the poll. So, it must // be executed in this round (because you can't have // more than one poll between two executions of a // negative event) if (e->timeout_at < 0) { Event *p = NULL; Event *a = NegativeQueue.head; while (a && a->timeout_at > e->timeout_at) { p = a; a = a->; } if (!a) NegativeQueue.enqueue(e); else NegativeQueue.insert(e, p); } else EventQueue.enqueue(e, cur_time); } } } // execute poll events while ((e = NegativeQueue.dequeue())) process_event(e, EVENT_POLL); if (!INK_ATOMICLIST_EMPTY( EventQueueExternal.dequeue_timed(cur_time, next_time, false); } else { // Means there are no negative events next_time = EventQueue.earliest_timeout(); ink_hrtime sleep_time = next_time - cur_time; if (sleep_time > THREAD_MAX_HEARTBEAT_MSECONDS * HRTIME_MSECOND) { next_time = cur_time + THREAD_MAX_HEARTBEAT_MSECONDS * HRTIME_MSECOND; sleep_time = THREAD_MAX_HEARTBEAT_MSECONDS * HRTIME_MSECOND; } // dequeue all the external events and put them in a local // queue. If there are no external events available, do a // cond_timedwait. if (n_ethreads_to_be_signalled) flush_signals(this); EventQueueExternal.dequeue_timed(cur_time, next_time, true); } } } case DEDICATED: { // coverity[lock] if (eventsem) ink_sem_wait(eventsem); MUTEX_TAKE_LOCK_FOR(oneevent->mutex, this, oneevent->continuation); oneevent->continuation->handleEvent(EVENT_IMMEDIATE, oneevent); MUTEX_UNTAKE_LOCK(oneevent->mutex, this); free_event(oneevent); break; } default: ink_assert(!"bad case value (execute)"); break; } /* End switch */ // coverity[missing_unlock] }
上述的execute代码是事件处理子系统中的核心 事件处理部分,因此我用Understand将此部分实现反向工程出流程图,看起来比较清晰:
上面这张图从 (;;) 发射出来的红色的no的分支箭头,应该是Understand对源码分析错误,我从源码中并没有看到会有这个走向。实际上在REGULAR的case分支下,是一个for(;;)的永循环,专门负责循环处理常规事件。
代码中的 EventQueueExternal 是事件模型种提到的外部队列,EventQueue是内部队列。事件的处理过程是这样的:先从外部队列中取出一个事件e,查看这个事件是否需要立刻执行(通过判断e->timeout_at可以确定是否需要立刻执行),如果需要立刻执行,则调用process_event立刻执行事件(稍后会分析process_event的实现细节);如果取出的事件e,并不是一个需要立刻执行的事件,且不属于(epoll之类的网络事件),则将这个事件加入到内部队列EventQueue中;如果取出的事件e属于epoll这样的网络事件,则将其加入到NegativeQueue中,随后会有针对这种事件的处理。外部队列的事件处理完成后,接下来处理内部队列中的事件(这部分事件有刚刚在处理外部事件时加入到内部队列的事件)内部队列EventQueue的实现是用的优先级队列的方式,并且从代码上观察,应该是只要处理掉一个内部队列的事件就会再次尝试检测内部队列是否有需要处理的事件。直至一次检查过程中没有需要被处理的事件,才会完成对内部队列事件的检查。
bool done_one; do { done_one = false; // execute all the eligible internal events EventQueue.check_ready(cur_time, this); while ((e = EventQueue.dequeue_ready(cur_time))) { ink_assert(e); ink_assert(e->timeout_at > 0); if (e->cancelled) free_event(e); else { done_one = true; process_event(e, e->callback_event); } } } while (done_one);
完成内部队列中的事件检查后,会检查刚刚提到的NegativeQueue队列中的事件(在处理NegativeQueue事件前,貌似源码种可以看到对于外部队列中的事件又做了一次检查,基本流程和上面的差不多,只不过加入了一些阻塞方法,例如EventQueueExternal.dequeue_timed(cur_time, next_time, false); 暂时没看太明白(从代码注释中理解,最后一个false表示,当外部队列中没有事件的时候,也不会阻塞线程等待,而会将事件取出,放到本地队列中)。然后进入到对poll事件的处理(NegativeQueue),代码如下:
// execute poll events while ((e = NegativeQueue.dequeue())) process_event(e, EVENT_POLL); if (!INK_ATOMICLIST_EMPTY( EventQueueExternal.dequeue_timed(cur_time, next_time, false); } else { // Means there are no negative events next_time = EventQueue.earliest_timeout(); ink_hrtime sleep_time = next_time - cur_time; if (sleep_time > THREAD_MAX_HEARTBEAT_MSECONDS * HRTIME_MSECOND) { next_time = cur_time + THREAD_MAX_HEARTBEAT_MSECONDS * HRTIME_MSECOND; sleep_time = THREAD_MAX_HEARTBEAT_MSECONDS * HRTIME_MSECOND; }