在建立所有配置端口的监听后,events模块会进行accept初始化:
// src/event/ngx_event.c
static ngx_int_t ngx_event_process_init(ngx_cycle_t *cycle)
{
……
ls = cycle->listening.elts;
for (i = 0; i < cycle->listening.nelts; i++) {
//对每个监听描述符创建一个对应的connection对象,封装了描述符fd
c = ngx_get_connection(ls[i].fd, cycle->log);
//拿到监听描述符的读事件
rev = c->read;
//表示accept新连接
rev->accept = 1;
//读事件的hander,type为流时走tcp的accept,否则直接走udp的recvmsg
rev->handler = (c->type == SOCK_STREAM) ? ngx_event_accept
: ngx_event_recvmsg;
//将读事件添加到IO多路复用模型中,当采用epoll模型时ngx_add_event就是ngx_epoll_add_event
//udp,当事件触发时会调用ngx_event_recvmsg
if (ngx_add_event(rev, NGX_READ_EVENT, 0) == NGX_ERROR) {
return NGX_ERROR;
}
}
}
epoll IO多路复用模型:
// src/event/modules/ngx_epoll_module.c
static ngx_str_t epoll_name = ngx_string("epoll");
static ngx_command_t ngx_epoll_commands[] = {
{ ngx_string("epoll_events"),
NGX_EVENT_CONF|NGX_CONF_TAKE1,
ngx_conf_set_num_slot,
0,
offsetof(ngx_epoll_conf_t, events),
NULL },
{ ngx_string("worker_aio_requests"),
NGX_EVENT_CONF|NGX_CONF_TAKE1,
ngx_conf_set_num_slot,
0,
offsetof(ngx_epoll_conf_t, aio_requests),
NULL },
ngx_null_command
};
static ngx_event_module_t ngx_epoll_module_ctx = {
&epoll_name,
ngx_epoll_create_conf, /* create configuration */
ngx_epoll_init_conf, /* init configuration */
{
ngx_epoll_add_event, /* add an event */
ngx_epoll_del_event, /* delete an event */
ngx_epoll_add_event, /* enable an event */
ngx_epoll_del_event, /* disable an event */
ngx_epoll_add_connection, /* add an connection */
ngx_epoll_del_connection, /* delete an connection */
#if (NGX_HAVE_EVENTFD)
ngx_epoll_notify, /* trigger a notify */
#else
NULL, /* trigger a notify */
#endif
ngx_epoll_process_events, /* process the events */
ngx_epoll_init, /* init the events */
ngx_epoll_done, /* done the events */
}
};
//事件处理过程
static ngx_int_t ngx_epoll_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, ngx_uint_t flags)
{
int events;
uint32_t revents;
ngx_int_t instance, i;
ngx_uint_t level;
ngx_err_t err;
ngx_event_t *rev, *wev;
ngx_queue_t *queue;
ngx_connection_t *c;
/* NGX_TIMER_INFINITE == INFTIM */
ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
"epoll timer: %M", timer);
events = epoll_wait(ep, event_list, (int) nevents, timer);
……
if (events == 0) {
if (timer != NGX_TIMER_INFINITE) {
return NGX_OK;
}
ngx_log_error(NGX_LOG_ALERT, cycle->log, 0,
"epoll_wait() returned no events without timeout");
return NGX_ERROR;
}
for (i = 0; i < events; i++) {
c = event_list[i].data.ptr;
instance = (uintptr_t) c & 1;
c = (ngx_connection_t *) ((uintptr_t) c & (uintptr_t) ~1);
rev = c->read;
……
revents = event_list[i].events;
……
if ((revents & EPOLLIN) && rev->active) {
rev->ready = 1;
if (flags & NGX_POST_EVENTS) {
queue = rev->accept ? &ngx_posted_accept_events
: &ngx_posted_events;
ngx_post_event(rev, queue);
} else {
//读事件
rev->handler(rev);
}
}
wev = c->write;
if ((revents & EPOLLOUT) && wev->active) {
……
if (flags & NGX_POST_EVENTS) {
ngx_post_event(wev, &ngx_posted_events);
} else {
//写事件
wev->handler(wev);
}
}
}
return NGX_OK;
}
udp读事件的handler函数ngx_event_recvmsg:
// src/event/ngx_event_udp.c
void ngx_event_recvmsg(ngx_event_t *ev){
struct msghdr msg;
struct iovec iov[1];
struct msghdr msg;
ngx_buf_t buf;
static u_char buffer[65535]; //静态读缓冲区
……
lc = ev->data;
ls = lc->listening;
do {
ngx_memzero(&msg, sizeof(struct msghdr));
iov[0].iov_base = (void *) buffer;
iov[0].iov_len = sizeof(buffer);
……
msg.msg_iov = iov;
msg.msg_iovlen = 1;
//读取报文,读出的数据存放在iov[0].iov_base也即是buffer中
n = recvmsg(lc->fd, &msg, 0);
//拿到源ip和源端口
sockaddr = msg.msg_name;
socklen = msg.msg_namelen;
//本地监听目的地址和端口
local_sockaddr = ls->sockaddr;
local_socklen = ls->socklen;
//ls代带有一个存放连接的红黑树,从红黑树找是否已经有相同四元组的连接
c = ngx_lookup_udp_connection(ls, sockaddr, socklen, local_sockaddr,
local_socklen);
if (c) {
……
ngx_memzero(&buf, sizeof(ngx_buf_t));
buf.pos = buffer;
buf.last = buffer + n;
//找到则调用读事件的handler处理数据读取事件
rev = c->read;
//最终,c->udp->buffer指向了读取的数据缓冲区
c->udp->buffer = &buf;
rev->ready = 1; //ready为1,后面会用到
//此时handler为ngx_stream_session_handler@src/stream/ngx_stream_handler.c
rev->handler(rev);
if (c->udp) {
c->udp->buffer = NULL;
}
rev->ready = 0;
goto next;
}
//找不到则新建一个connection对象
c = ngx_get_connection(lc->fd, ev->log);
if (c == NULL) {
return;
}
c->shared = 1;
c->type = SOCK_DGRAM;
c->socklen = socklen;
……
//将本次读到的缓冲区buffer中的数据拷贝追加到临时缓冲区c->buffer后面
c->buffer = ngx_create_temp_buf(c->pool, n);
c->buffer->last = ngx_cpymem(c->buffer->last, buffer, n);
//设置连接的recv和send操作函数
//ngx_udp_shared_recv是直接从c->udp->buffer拷贝出数据
c->recv = ngx_udp_shared_recv;
c->send = ngx_udp_send;
c->send_chain = ngx_udp_send_chain;
//将新的对象插入到管理连接的红黑树中
if (ngx_insert_udp_connection(c) != NGX_OK) {
ngx_close_accepted_udp_connection(c);
return;
}
//调用监听handler处理新连接事件,
//此时handler为ngx_stream_init_connection@src/stream/ngx_stream_handler.c
ls->handler(c);
next:
if (ngx_event_flags & NGX_USE_KQUEUE_EVENT) {
ev->available -= n;
}
} while (ev->available);
}
新udp连接处理即新的session:
// src/stream/ngx_stream_handler.c
void ngx_stream_init_connection(ngx_connection_t *c){
……
//新建一个session对象,做一些初始化赋值
s = ngx_pcalloc(c->pool, sizeof(ngx_stream_session_t));
……
if (c->buffer) {
s->received += c->buffer->last - c->buffer->pos;
}
s->connection = c;
c->data = s;
//给connection的读事件赋session handler并用它处理新的session
rev = c->read;
rev->handler = ngx_stream_session_handler;
rev->handler(rev);
}
void ngx_stream_session_handler(ngx_event_t *rev)
{
ngx_connection_t *c;
ngx_stream_session_t *s;
c = rev->data;
s = c->data;
//执行代理过程中的各阶段中的步骤
ngx_stream_core_run_phases(s);
}
有7个阶段,初始化它们的checker和handler:
// src/stream/ngx_stream.h
typedef enum {
NGX_STREAM_POST_ACCEPT_PHASE = 0,
NGX_STREAM_PREACCESS_PHASE,
NGX_STREAM_ACCESS_PHASE,
NGX_STREAM_SSL_PHASE, //SSL握手阶段
NGX_STREAM_PREREAD_PHASE, //此阶段读取数据,(SSL解密)
NGX_STREAM_CONTENT_PHASE, //数据处理,代理转发
NGX_STREAM_LOG_PHASE
} ngx_stream_phases;
// src/stream/ngx_stream.c
static ngx_int_t ngx_stream_init_phase_handlers(ngx_conf_t *cf,
ngx_stream_core_main_conf_t *cmcf)
{
ngx_int_t j;
ngx_uint_t i, n;
ngx_stream_handler_pt *h;
ngx_stream_phase_handler_t *ph;
ngx_stream_phase_handler_pt checker;
n = 1 /* content phase */;
//汇总hander数
for (i = 0; i < NGX_STREAM_LOG_PHASE; i++) {
n += cmcf->phases[i].handlers.nelts;
}
ph = ngx_pcalloc(cf->pool,
n * sizeof(ngx_stream_phase_handler_t) + sizeof(void *));
if (ph == NULL) {
return NGX_ERROR;
}
cmcf->phase_engine.handlers = ph;
n = 0;
//初始化各阶段的checker和handler
for (i = 0; i < NGX_STREAM_LOG_PHASE; i++) {
h = cmcf->phases[i].handlers.elts;
switch (i) {
case NGX_STREAM_PREREAD_PHASE: //读客户端数据
checker = ngx_stream_core_preread_phase;
break;
case NGX_STREAM_CONTENT_PHASE: //数据处理、代理转发,此处没有设置hander
ph->checker = ngx_stream_core_content_phase;
n++;
ph++;
continue;
default:
checker = ngx_stream_core_generic_phase;
}
n += cmcf->phases[i].handlers.nelts;
for (j = cmcf->phases[i].handlers.nelts - 1; j >= 0; j--) {
ph->checker = checker;
ph->handler = h[j];
ph->next = n;
ph++;
}
}
return NGX_OK;
}
再看运行各阶段的checker:
// src/stream/ngx_stream_core_module.c
void ngx_stream_core_run_phases(ngx_stream_session_t *s)
{
ngx_int_t rc;
ngx_stream_phase_handler_t *ph;
ngx_stream_core_main_conf_t *cmcf;
cmcf = ngx_stream_get_module_main_conf(s, ngx_stream_core_module);
ph = cmcf->phase_engine.handlers;
while (ph[s->phase_handler].checker) {
//执行各阶段的checker函数,checker内部需要对数组下标s->phase_handler作前进操作
rc = ph[s->phase_handler].checker(s, &ph[s->phase_handler]);
//返回NGX_OK时不再执行后面的所有handler
if (rc == NGX_OK) {
return;
}
}
}
//默认的checker成员函数
ngx_int_t ngx_stream_core_generic_phase(ngx_stream_session_t *s,
ngx_stream_phase_handler_t *ph)
{
ngx_int_t rc;
//调用的还是handler
rc = ph->handler(s);
if (rc == NGX_OK) {
//执行下一阶段的handler
s->phase_handler = ph->next;
return NGX_AGAIN;
}
if (rc == NGX_DECLINED) {
//执行本阶段的下一个handler
s->phase_handler++;
return NGX_AGAIN;
}
if (rc == NGX_AGAIN || rc == NGX_DONE) {
return NGX_OK;
}
if (rc == NGX_ERROR) {
rc = NGX_STREAM_INTERNAL_SERVER_ERROR;
}
//出错则关闭session
ngx_stream_finalize_session(s, rc);
return NGX_OK;
}
//PREREAD阶段的checker成员函数
ngx_int_t ngx_stream_core_preread_phase(ngx_stream_session_t *s,
ngx_stream_phase_handler_t *ph)
{
size_t size;
ssize_t n;
ngx_int_t rc;
ngx_connection_t *c;
ngx_stream_core_srv_conf_t *cscf;
c = s->connection;
cscf = ngx_stream_get_module_srv_conf(s, ngx_stream_core_module);
if (c->read->timedout) {
rc = NGX_STREAM_OK;
} else if (c->read->timer_set) {
rc = NGX_AGAIN;
} else {
//handler为ngx_stream_ssl_preread_handler,处理已读取buf中的数据,可能需要进行SSL解密
rc = ph->handler(s);
}
while (rc == NGX_AGAIN) {
if (c->buffer == NULL) {
c->buffer = ngx_create_temp_buf(c->pool, cscf->preread_buffer_size);
if (c->buffer == NULL) {
rc = NGX_ERROR;
break;
}
}
size = c->buffer->end - c->buffer->last;
if (size == 0) {
ngx_log_error(NGX_LOG_ERR, c->log, 0, "preread buffer full");
rc = NGX_STREAM_BAD_REQUEST;
break;
}
if (c->read->eof) {
rc = NGX_STREAM_OK;
break;
}
if (!c->read->ready) {
break;
}
//读取数据到connection对象的buffer中
n = c->recv(c, c->buffer->last, size);
if (n == NGX_ERROR || n == 0) {
rc = NGX_STREAM_OK;
break;
}
if (n == NGX_AGAIN) {
break;
}
c->buffer->last += n;
//处理已读取buf中的数据,可能需要进行SSL解密
rc = ph->handler(s);
}
if (rc == NGX_AGAIN) {
if (ngx_handle_read_event(c->read, 0) != NGX_OK) {
ngx_stream_finalize_session(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return NGX_OK;
}
if (!c->read->timer_set) {
ngx_add_timer(c->read, cscf->preread_timeout);
}
c->read->handler = ngx_stream_session_handler;
return NGX_OK;
}
if (c->read->timer_set) {
ngx_del_timer(c->read);
}
if (rc == NGX_OK) {
//执行下一阶段的handler
s->phase_handler = ph->next;
return NGX_AGAIN;
}
if (rc == NGX_DECLINED) {
//执行本阶段的下一个handler
s->phase_handler++;
return NGX_AGAIN;
}
if (rc == NGX_DONE) {
return NGX_OK;
}
if (rc == NGX_ERROR) {
rc = NGX_STREAM_INTERNAL_SERVER_ERROR;
}
ngx_stream_finalize_session(s, rc);
return NGX_OK;
}
//Content阶段的checker
ngx_int_t ngx_stream_core_content_phase(ngx_stream_session_t *s,
ngx_stream_phase_handler_t *ph)
{
ngx_connection_t *c;
ngx_stream_core_srv_conf_t *cscf;
c = s->connection;
c->log->action = NULL;
cscf = ngx_stream_get_module_srv_conf(s, ngx_stream_core_module);
if (c->type == SOCK_STREAM
&& cscf->tcp_nodelay
&& ngx_tcp_nodelay(c) != NGX_OK)
{
ngx_stream_finalize_session(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return NGX_OK;
}
//也只调用了一个handler,这个handler是靠配置得来的,实际就是ngx_stream_proxy_handler
cscf->handler(s);
return NGX_OK;
}
Content阶段的hander,是配置proxy_pass模块中的ngx_stream_proxy_handler,即代理转发。nginx将与后端服务器节点的连接称为upstream,将与前端的连接称为downstream:
// src/stream/ngx_stream_proxy_module.c
static char *ngx_stream_proxy_pass(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{
……
cscf = ngx_stream_conf_get_module_srv_conf(cf, ngx_stream_core_module);
//配置proxy_pass时就会有此handler
cscf->handler = ngx_stream_proxy_handler;
value = cf->args->elts;
url = &value[1];
ngx_memzero(&ccv, sizeof(ngx_stream_compile_complex_value_t));
……
ngx_memzero(&u, sizeof(ngx_url_t));
u.url = *url;
u.no_resolve = 1;
//找到对应的upstream
pscf->upstream = ngx_stream_upstream_add(cf, &u, 0);
if (pscf->upstream == NULL) {
return NGX_CONF_ERROR;
}
return NGX_CONF_OK;
}
//代理handler
static void ngx_stream_proxy_handler(ngx_stream_session_t *s)
{
u_char *p;
ngx_str_t *host;
ngx_uint_t i;
ngx_connection_t *c;
ngx_resolver_ctx_t *ctx, temp;
ngx_stream_upstream_t *u;
ngx_stream_core_srv_conf_t *cscf;
ngx_stream_proxy_srv_conf_t *pscf;
ngx_stream_upstream_srv_conf_t *uscf, **uscfp;
ngx_stream_upstream_main_conf_t *umcf;
c = s->connection;
pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);
u = ngx_pcalloc(c->pool, sizeof(ngx_stream_upstream_t));
if (u == NULL) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
s->upstream = u;
u->requests = 1;
u->peer.type = c->type;
u->start_sec = ngx_time();
//设置downstream的读写handler,读写handler相同,靠参数确定是读还是写
c->write->handler = ngx_stream_proxy_downstream_handler;
c->read->handler = ngx_stream_proxy_downstream_handler;
……
//准备upstream读取数据的缓冲区,也就是存放从upstream server读取来返回的给downstream客户端的数据的
p = ngx_pnalloc(c->pool, pscf->buffer_size);
if (p == NULL) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
……
if (c->read->ready) {
//此时downstream发来的数据已经读取在缓冲区中,将read事件放到全局的双向链表事件队列中,
//让全局事件轮询处理过程去处理每个事件,实际就是调用事件的handler函数,
//也就是异步调用ngx_stream_proxy_downstream_handler
ngx_post_event(c->read, &ngx_posted_events);
}
if (pscf->upstream_value) {
if (ngx_stream_proxy_eval(s, pscf) != NGX_OK) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
}
if (u->resolved == NULL) {
uscf = pscf->upstream;
} else {
//域名解析
……
}
if (uscf == NULL) {
ngx_log_error(NGX_LOG_ALERT, c->log, 0, "no upstream configuration");
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
//确定了upstream
u->upstream = uscf;
if (uscf->peer.init(s, uscf) != NGX_OK) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
u->peer.start_time = ngx_current_msec;
……
//连接upstream中的代理服务器
ngx_stream_proxy_connect(s);
}
//downstream的读写handler,靠ev->write这个标志位确定是读还是写
static void ngx_stream_proxy_downstream_handler(ngx_event_t *ev)
{
//downstream的读写bool逻辑与是否从upstream读的逻辑相同
//ev->write为true时,就是对downstream的写,也就是从upstream读,即from_upstream为true;
//反之,ev->write为false时,就是对downstream的读,也就是对upstream写,即from_upstream为false。
ngx_stream_proxy_process_connection(ev, ev->write);
}
//@from_upstream参数表示是对downstream连接的写还是读
static void ngx_stream_proxy_process_connection(ngx_event_t *ev, ngx_uint_t from_upstream)
{
……
s = c->data;
//一些连接状态、超时处理
……
//
ngx_stream_proxy_process(s, from_upstream, ev->write);
}
static void ngx_stream_proxy_process(ngx_stream_session_t *s, ngx_uint_t from_upstream,
ngx_uint_t do_write)
{
……
u = s->upstream;
//downstream连接
c = s->connection;
//upstream连接
pc = u->connected ? u->peer.connection : NULL;
……
pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);
//根据参数from_upstream确定读写的方向
if (from_upstream) {
src = pc;
dst = c;
b = &u->upstream_buf; //upstream读缓冲区
limit_rate = pscf->download_rate;
received = &u->received;
packets = &u->responses;
out = &u->downstream_out;
busy = &u->downstream_busy;
} else {
src = c;
dst = pc;
b = &u->downstream_buf; //downstream读缓冲区
limit_rate = pscf->upload_rate;
received = &s->received;
packets = &u->requests;
out = &u->upstream_out;
busy = &u->upstream_busy;
}
for ( ;; ) {
if (do_write && dst) {
if (*out || *busy || dst->buffered) {
//ngx_stream_top_filter实际是ngx_stream_write_filter
//内部调用c->send_chain将数据转发
rc = ngx_stream_top_filter(s, *out, from_upstream);
if (rc == NGX_ERROR) {
ngx_stream_proxy_finalize(s, NGX_STREAM_OK);
return;
}
ngx_chain_update_chains(c->pool, &u->free, busy, out,
(ngx_buf_tag_t) &ngx_stream_proxy_module);
if (*busy == NULL) {
b->pos = b->start;
b->last = b->start;
}
}
}
size = b->end - b->last;
if (size && src->read->ready && !src->read->delayed
&& !src->read->error)
{
if (limit_rate) {
//限速处理
……
}
c->log->action = recv_action;
//
n = src->recv(src, b->last, size);
……
if (n >= 0) {
if (limit_rate) {
delay = (ngx_msec_t) (n * 1000 / limit_rate);
if (delay > 0) {
src->read->delayed = 1;
ngx_add_timer(src->read, delay);
}
}
if (from_upstream) {
if (u->state->first_byte_time == (ngx_msec_t) -1) {
u->state->first_byte_time = ngx_current_msec
- u->start_time;
}
}
for (ll = out; *ll; ll = &(*ll)->next) { /* void */ }
cl = ngx_chain_get_free_buf(c->pool, &u->free);
if (cl == NULL) {
ngx_stream_proxy_finalize(s,
NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
*ll = cl;
cl->buf->pos = b->last;
cl->buf->last = b->last + n;
cl->buf->tag = (ngx_buf_tag_t) &ngx_stream_proxy_module;
cl->buf->temporary = (n ? 1 : 0);
cl->buf->last_buf = src->read->eof;
cl->buf->flush = 1;
(*packets)++;
*received += n;
b->last += n;
do_write = 1; //标志有数据可以发送了
continue;
}
}
break;
}
//数据已经转发完,尝试关闭连接,会判断一些条件,看能不能关闭,尤其是udp
if (ngx_stream_proxy_test_finalize(s, from_upstream) == NGX_OK) {
return;
}
flags = src->read->eof ? NGX_CLOSE_EVENT : 0;
if (!src->shared && ngx_handle_read_event(src->read, flags) != NGX_OK) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
if (dst) {
if (!dst->shared && ngx_handle_write_event(dst->write, 0) != NGX_OK) {
ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
return;
}
if (!c->read->delayed && !pc->read->delayed) {
ngx_add_timer(c->write, pscf->timeout);
} else if (c->write->timer_set) {
ngx_del_timer(c->write);
}
}
}
//连接upstream中的代理服务器
static void ngx_stream_proxy_connect(ngx_stream_session_t *s)
{
ngx_int_t rc;
ngx_connection_t *c, *pc;
ngx_stream_upstream_t *u;
ngx_stream_proxy_srv_conf_t *pscf;
c = s->connection;
……
//连接upstream中的代理服务器,内部通过一些负载均衡算法选择一个server来连接
rc = ngx_event_connect_peer(&u->peer);
……
//设置upstream连接handler
pc = u->peer.connection;
pc->read->handler = ngx_stream_proxy_connect_handler;
pc->write->handler = ngx_stream_proxy_connect_handler;
//添加upstream连接事件到timer中,实际是为了异步调用ngx_stream_proxy_connect_handler
ngx_add_timer(pc->write, pscf->connect_timeout);
}
//连接upstream中的代理服务器
ngx_int_t ngx_event_connect_peer(ngx_peer_connection_t *pc)
{
int rc;
//通过调用get函数指向指向的函数获取到一个合适的upstream server,由配置确定;
//如果配置为一致性hash算法,则get = ngx_stream_upstream_get_chash_peer
rc = pc->get(pc, pc->data);
//连接过程,简略
type = (pc->type ? pc->type : SOCK_STREAM);
s = ngx_socket(pc->sockaddr->sa_family, type, 0);
c = ngx_get_connection(s, pc->log);
c->type = type;
……
// upstream连接绑定源地址,如果要绑定downstream连接的源地址,
// 需要配置transparent,以使得可以绑定非本机地址,即实现客户端ip端口透传
if (pc->local) {
#if (NGX_HAVE_TRANSPARENT_PROXY)
if (pc->transparent) {
/*
ngx_event_connect_set_transparent内部代码大致为:
int value = 1;
setsockopt(s, IPPROTO_IP, IP_TRANSPARENT,(const void *) &value, sizeof(int))
*/
if (ngx_event_connect_set_transparent(pc, s) != NGX_OK) {
goto failed;
}
}
#endif
……
if (bind(s, pc->local->sockaddr, pc->local->socklen) == -1) {
ngx_log_error(NGX_LOG_CRIT, pc->log, ngx_socket_errno,
"bind(%V) failed", &pc->local->name);
goto failed;
}
}
if (type == SOCK_STREAM) {
……
} else { /* type == SOCK_DGRAM */
c->recv = ngx_udp_recv;
c->send = ngx_send;
c->send_chain = ngx_udp_send_chain;
}
……
pc->connection = c;
c->number = ngx_atomic_fetch_add(ngx_connection_counter, 1);
if (ngx_add_conn) {
if (ngx_add_conn(c) == NGX_ERROR) {
goto failed;
}
}
rc = connect(s, pc->sockaddr, pc->socklen);
……
if (type == SOCK_STREAM) {
……
} else { /* type == SOCK_DGRAM */
c->recv = ngx_udp_recv;
c->send = ngx_send;
c->send_chain = ngx_udp_send_chain;
}
……
if (ngx_add_conn) {
wev->ready = 1;
return NGX_OK;
}
……
return NGX_OK;
}
// upstream server连接后的handler
static void ngx_stream_proxy_connect_handler(ngx_event_t *ev)
{
ngx_connection_t *c;
ngx_stream_session_t *s;
c = ev->data;
s = c->data;
……
//测试一下连接,如果失败了就关闭此连接后,再根据负载算法选择下一个upstream server去连接
if (ngx_stream_proxy_test_connect(c) != NGX_OK) {
ngx_stream_proxy_next_upstream(s);
return;
}
//初始化upstream连接
ngx_stream_proxy_init_upstream(s);
}
static void ngx_stream_proxy_init_upstream(ngx_stream_session_t *s)
{
ngx_connection_t *c, *pc;
ngx_log_handler_pt handler;
ngx_stream_upstream_t *u;
……
u = s->upstream;
pc = u->peer.connection;
……
c = s->connection;
u->connected = 1;
pc->read->handler = ngx_stream_proxy_upstream_handler;
pc->write->handler = ngx_stream_proxy_upstream_handler;
if (pc->read->ready) {
//异步读
ngx_post_event(pc->read, &ngx_posted_events);
}
//调用前面提到的ngx_stream_proxy_process,从downstream读,转发到upstream
ngx_stream_proxy_process(s, 0, 1);
}
转发:
// src/stream/ngx_stream_write_filter_module.c
static ngx_int_t ngx_stream_write_filter(ngx_stream_session_t *s, ngx_chain_t *in,
ngx_uint_t from_upstream)
{
ngx_chain_t *cl, *ln, **ll, **out, *chain;
ngx_connection_t *c;
……
if (from_upstream) {
c = s->connection;
out = &ctx->from_upstream;
} else {
c = s->upstream->peer.connection;
out = &ctx->from_downstream;
}
……
//转发数据
chain = c->send_chain(c, *out, 0);
……
return NGX_OK;
}
一致性hash负载:
// src/stream/ngx_stream_upstream_hash_module.c
static ngx_int_t ngx_stream_upstream_init_chash_peer(ngx_stream_session_t *s,
ngx_stream_upstream_srv_conf_t *us)
{
uint32_t hash;
ngx_stream_upstream_hash_srv_conf_t *hcf;
ngx_stream_upstream_hash_peer_data_t *hp;
if (ngx_stream_upstream_init_hash_peer(s, us) != NGX_OK) {
return NGX_ERROR;
}
s->upstream->peer.get = ngx_stream_upstream_get_chash_peer;
hp = s->upstream->peer.data;
hcf = ngx_stream_conf_upstream_srv_conf(us,
ngx_stream_upstream_hash_module);
//根据配置计算session的hash
hash = ngx_crc32_long(hp->key.data, hp->key.len);
ngx_stream_upstream_rr_peers_rlock(hp->rrp.peers);
//找到hash值所属槽位
hp->hash = ngx_stream_upstream_find_chash_point(hcf->points, hash);
ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
return NGX_OK;
}
static ngx_int_t ngx_stream_upstream_get_chash_peer(ngx_peer_connection_t *pc, void *data)
{
ngx_stream_upstream_hash_peer_data_t *hp = data;
time_t now;
intptr_t m;
ngx_str_t *server;
ngx_int_t total;
ngx_uint_t i, n, best_i;
ngx_stream_upstream_rr_peer_t *peer, *best;
ngx_stream_upstream_chash_point_t *point;
ngx_stream_upstream_chash_points_t *points;
ngx_stream_upstream_hash_srv_conf_t *hcf;
ngx_log_debug1(NGX_LOG_DEBUG_STREAM, pc->log, 0,
"get consistent hash peer, try: %ui", pc->tries);
ngx_stream_upstream_rr_peers_wlock(hp->rrp.peers);
if (hp->tries > 20 || hp->rrp.peers->single) {
ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
return hp->get_rr_peer(pc, &hp->rrp);
}
pc->connection = NULL;
now = ngx_time();
hcf = hp->conf;
points = hcf->points;
point = &points->point[0];
for ( ;; ) {
//通过hash得到对应的upstream server
server = point[hp->hash % points->number].server;
best = NULL;
best_i = 0;
total = 0;
//然后在循环调试队列中验证该upstream server是否有效,无效则则循环取下一个server
for (peer = hp->rrp.peers->peer, i = 0;
peer;
peer = peer->next, i++)
{
//调度掩码
n = i / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));
if (hp->rrp.tried[n] & m) {
continue;
}
//是否被标记为down
if (peer->down) {
continue;
}
//健康检查失败次数是否超过配置的最大次数
if (peer->max_fails
&& peer->fails >= peer->max_fails
&& now - peer->checked <= peer->fail_timeout)
{
continue;
}
//已连接数是否超过超过配置的最大次数
if (peer->max_conns && peer->conns >= peer->max_conns) {
continue;
}
//地址是否一致
if (peer->server.len != server->len
|| ngx_strncmp(peer->server.data, server->data, server->len)
!= 0)
{
continue;
}
peer->current_weight += peer->effective_weight;
total += peer->effective_weight;
if (peer->effective_weight < peer->weight) {
peer->effective_weight++;
}
if (best == NULL || peer->current_weight > best->current_weight) {
best = peer;
best_i = i;
}
}
if (best) {
//找到则跳出循环
best->current_weight -= total;
break;
}
//否则hash索引加1,循环调度寻找一个server
hp->hash++;
hp->tries++;
if (hp->tries > 20) {
//循环尝试20次都找不到使用循环调度
ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
return hp->get_rr_peer(pc, &hp->rrp);
}
}
hp->rrp.current = best;
pc->sockaddr = best->sockaddr;
pc->socklen = best->socklen;
pc->name = &best->name;
//连接数+1
best->conns++;
//更新最新check时间
if (now - best->checked > best->fail_timeout) {
best->checked = now;
}
ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
//更新调度掩码位
n = best_i / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << best_i % (8 * sizeof(uintptr_t));
hp->rrp.tried[n] |= m;
return NGX_OK;
}
限制连接模块:
// src/stream/ngx_stream_limit_conn_module.c
static ngx_int_t ngx_stream_limit_conn_init(ngx_conf_t *cf)
{
ngx_stream_handler_pt *h;
ngx_stream_core_main_conf_t *cmcf;
cmcf = ngx_stream_conf_get_module_main_conf(cf, ngx_stream_core_module);
//添加了一个NGX_STREAM_PREACCESS_PHASE阶段的handler,即ngx_stream_limit_conn_handler,
//于是就走了限制连接的流程
h = ngx_array_push(&cmcf->phases[NGX_STREAM_PREACCESS_PHASE].handlers);
if (h == NULL) {
return NGX_ERROR;
}
*h = ngx_stream_limit_conn_handler;
return NGX_OK;
}
static ngx_int_t ngx_stream_limit_conn_handler(ngx_stream_session_t *s)
{
size_t n;
uint32_t hash;
ngx_str_t key;
ngx_uint_t i;
ngx_slab_pool_t *shpool;
ngx_rbtree_node_t *node;
ngx_pool_cleanup_t *cln;
ngx_stream_limit_conn_ctx_t *ctx;
ngx_stream_limit_conn_node_t *lc;
ngx_stream_limit_conn_conf_t *lccf;
ngx_stream_limit_conn_limit_t *limits;
ngx_stream_limit_conn_cleanup_t *lccln;
lccf = ngx_stream_get_module_srv_conf(s, ngx_stream_limit_conn_module);
limits = lccf->limits.elts;
for (i = 0; i < lccf->limits.nelts; i++) {
ctx = limits[i].shm_zone->data;
//根据配置得出session的hash key,比如我们根据客户端ip来限制连接数
if (ngx_stream_complex_value(s, &ctx->key, &key) != NGX_OK) {
return NGX_ERROR;
}
if (key.len == 0) {
continue;
}
if (key.len > 255) {
ngx_log_error(NGX_LOG_ERR, s->connection->log, 0,
"the value of the \"%V\" key "
"is more than 255 bytes: \"%V\"",
&ctx->key.value, &key);
continue;
}
//根据配置计算session hash key的hash值
hash = ngx_crc32_short(key.data, key.len);
shpool = (ngx_slab_pool_t *) limits[i].shm_zone->shm.addr;
ngx_shmtx_lock(&shpool->mutex);
//hash值由红黑树数据结构管理,在已构建树中查找hash值,找到node节点
node = ngx_stream_limit_conn_lookup(ctx->rbtree, &key, hash);
if (node == NULL) {
//没到找,则新增
n = offsetof(ngx_rbtree_node_t, color)
+ offsetof(ngx_stream_limit_conn_node_t, data)
+ key.len;
node = ngx_slab_alloc_locked(shpool, n);
if (node == NULL) {
ngx_shmtx_unlock(&shpool->mutex);
ngx_stream_limit_conn_cleanup_all(s->connection->pool);
return NGX_STREAM_SERVICE_UNAVAILABLE;
}
lc = (ngx_stream_limit_conn_node_t *) &node->color;
node->key = hash;
lc->len = (u_char) key.len;
lc->conn = 1; //连接数初始为1
ngx_memcpy(lc->data, key.data, key.len);
ngx_rbtree_insert(ctx->rbtree, node);
} else {
lc = (ngx_stream_limit_conn_node_t *) &node->color;
if ((ngx_uint_t) lc->conn >= limits[i].conn) {
//连接数超限,关闭连接
ngx_shmtx_unlock(&shpool->mutex);
ngx_stream_limit_conn_cleanup_all(s->connection->pool);
return NGX_STREAM_SERVICE_UNAVAILABLE;
}
lc->conn++;//连接数+1
}
ngx_shmtx_unlock(&shpool->mutex);
cln = ngx_pool_cleanup_add(s->connection->pool,
sizeof(ngx_stream_limit_conn_cleanup_t));
if (cln == NULL) {
return NGX_ERROR;
}
cln->handler = ngx_stream_limit_conn_cleanup;
lccln = cln->data;
lccln->shm_zone = limits[i].shm_zone;
lccln->node = node;
}
return NGX_DECLINED;
}
最后看一下前面提到的数据转发函数里ngx_stream_proxy_process中,数据转发完后,调用了ngx_stream_proxy_test_finalize函数:
static ngx_int_t ngx_stream_proxy_test_finalize(ngx_stream_session_t *s,
ngx_uint_t from_upstream)
{
ngx_connection_t *c, *pc;
ngx_log_handler_pt handler;
ngx_stream_upstream_t *u;
ngx_stream_proxy_srv_conf_t *pscf;
pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);
c = s->connection;
u = s->upstream;
pc = u->connected ? u->peer.connection : NULL;
if (c->type == SOCK_DGRAM) {
if (pscf->requests && u->requests < pscf->requests) {
return NGX_DECLINED;
}
if (pscf->requests) {
ngx_delete_udp_connection(c);
}
//pscf->responses就是配置中的proxy_responses项,表示一次请求对应多少次响应
//没有配置时默认为int32最大值,无限;
//当pscf->responses没有配置时或者upstream回包次数小于期望值时,值认为会话还要保持,
//否则就立即断开连接会话。如果配置为0会导致会话立即关闭,session生命周期很短暂。
if (pscf->responses == NGX_MAX_INT32_VALUE
|| u->responses < pscf->responses * u->requests)
{
return NGX_DECLINED;
}
if (pc == NULL || c->buffered || pc->buffered) {
return NGX_DECLINED;
}
handler = c->log->handler;
c->log->handler = NULL;
ngx_log_error(NGX_LOG_INFO, c->log, 0,
"udp done"
", packets from/to client:%ui/%ui"
", bytes from/to client:%O/%O"
", bytes from/to upstream:%O/%O",
u->requests, u->responses,
s->received, c->sent, u->received, pc ? pc->sent : 0);
c->log->handler = handler;
ngx_stream_proxy_finalize(s, NGX_STREAM_OK);
return NGX_OK;
}
……
return NGX_OK;
}