nginx 4层udp代理源码剖析

在建立所有配置端口的监听后,events模块会进行accept初始化:

// src/event/ngx_event.c
static ngx_int_t ngx_event_process_init(ngx_cycle_t *cycle)
{
    ……
    ls = cycle->listening.elts;
    for (i = 0; i < cycle->listening.nelts; i++) {
        //对每个监听描述符创建一个对应的connection对象,封装了描述符fd
        c = ngx_get_connection(ls[i].fd, cycle->log);
        
        //拿到监听描述符的读事件
        rev = c->read;
        
        //表示accept新连接
        rev->accept = 1;
        
        //读事件的hander,type为流时走tcp的accept,否则直接走udp的recvmsg
        rev->handler = (c->type == SOCK_STREAM) ? ngx_event_accept
                                                : ngx_event_recvmsg;
        
        //将读事件添加到IO多路复用模型中,当采用epoll模型时ngx_add_event就是ngx_epoll_add_event
        //udp,当事件触发时会调用ngx_event_recvmsg
        if (ngx_add_event(rev, NGX_READ_EVENT, 0) == NGX_ERROR) {
            return NGX_ERROR;
        }
    }
}

epoll IO多路复用模型:

// src/event/modules/ngx_epoll_module.c
static ngx_str_t      epoll_name = ngx_string("epoll");

static ngx_command_t  ngx_epoll_commands[] = {

    { ngx_string("epoll_events"),
      NGX_EVENT_CONF|NGX_CONF_TAKE1,
      ngx_conf_set_num_slot,
      0,
      offsetof(ngx_epoll_conf_t, events),
      NULL },

    { ngx_string("worker_aio_requests"),
      NGX_EVENT_CONF|NGX_CONF_TAKE1,
      ngx_conf_set_num_slot,
      0,
      offsetof(ngx_epoll_conf_t, aio_requests),
      NULL },

      ngx_null_command
};


static ngx_event_module_t  ngx_epoll_module_ctx = {
    &epoll_name,
    ngx_epoll_create_conf,               /* create configuration */
    ngx_epoll_init_conf,                 /* init configuration */

    {
        ngx_epoll_add_event,             /* add an event */
        ngx_epoll_del_event,             /* delete an event */
        ngx_epoll_add_event,             /* enable an event */
        ngx_epoll_del_event,             /* disable an event */
        ngx_epoll_add_connection,        /* add an connection */
        ngx_epoll_del_connection,        /* delete an connection */
#if (NGX_HAVE_EVENTFD)
        ngx_epoll_notify,                /* trigger a notify */
#else
        NULL,                            /* trigger a notify */
#endif
        ngx_epoll_process_events,        /* process the events */
        ngx_epoll_init,                  /* init the events */
        ngx_epoll_done,                  /* done the events */
    }
};

//事件处理过程
static ngx_int_t ngx_epoll_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, ngx_uint_t flags)
{
    int                events;
    uint32_t           revents;
    ngx_int_t          instance, i;
    ngx_uint_t         level;
    ngx_err_t          err;
    ngx_event_t       *rev, *wev;
    ngx_queue_t       *queue;
    ngx_connection_t  *c;

    /* NGX_TIMER_INFINITE == INFTIM */

    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
                   "epoll timer: %M", timer);

    events = epoll_wait(ep, event_list, (int) nevents, timer);

    ……

    if (events == 0) {
        if (timer != NGX_TIMER_INFINITE) {
            return NGX_OK;
        }

        ngx_log_error(NGX_LOG_ALERT, cycle->log, 0,
                      "epoll_wait() returned no events without timeout");
        return NGX_ERROR;
    }

    for (i = 0; i < events; i++) {
        c = event_list[i].data.ptr;

        instance = (uintptr_t) c & 1;
        c = (ngx_connection_t *) ((uintptr_t) c & (uintptr_t) ~1);

        rev = c->read;

        ……
            
        revents = event_list[i].events;

        ……

        if ((revents & EPOLLIN) && rev->active) {


            rev->ready = 1;

            if (flags & NGX_POST_EVENTS) {
                queue = rev->accept ? &ngx_posted_accept_events
                                    : &ngx_posted_events;

                ngx_post_event(rev, queue);

            } else {
                 //读事件
                rev->handler(rev);
            }
        }

        wev = c->write;

        if ((revents & EPOLLOUT) && wev->active) {

            ……

            if (flags & NGX_POST_EVENTS) {
                ngx_post_event(wev, &ngx_posted_events);

            } else {
                //写事件
                wev->handler(wev);
            }
        }
    }

    return NGX_OK;
}

udp读事件的handler函数ngx_event_recvmsg:

// src/event/ngx_event_udp.c
void ngx_event_recvmsg(ngx_event_t *ev){
    struct msghdr      msg;
    struct iovec       iov[1];
    struct msghdr      msg;
    ngx_buf_t          buf;
    static u_char      buffer[65535]; //静态读缓冲区
    
    ……
        
    lc = ev->data;
    ls = lc->listening;
    do {
        ngx_memzero(&msg, sizeof(struct msghdr));
        
        iov[0].iov_base = (void *) buffer;
        iov[0].iov_len = sizeof(buffer);
        ……
        msg.msg_iov = iov;
        msg.msg_iovlen = 1;
        
        //读取报文,读出的数据存放在iov[0].iov_base也即是buffer中
        n = recvmsg(lc->fd, &msg, 0);
        
        //拿到源ip和源端口
        sockaddr = msg.msg_name;
        socklen = msg.msg_namelen;

        //本地监听目的地址和端口
        local_sockaddr = ls->sockaddr;
        local_socklen = ls->socklen;

        //ls代带有一个存放连接的红黑树,从红黑树找是否已经有相同四元组的连接
        c = ngx_lookup_udp_connection(ls, sockaddr, socklen, local_sockaddr,
                                      local_socklen);
        if (c) {
            ……
            ngx_memzero(&buf, sizeof(ngx_buf_t)); 
            buf.pos = buffer;
            buf.last = buffer + n;
            
            //找到则调用读事件的handler处理数据读取事件
            rev = c->read;

            //最终,c->udp->buffer指向了读取的数据缓冲区
            c->udp->buffer = &buf;
            rev->ready = 1;  //ready为1,后面会用到

            //此时handler为ngx_stream_session_handler@src/stream/ngx_stream_handler.c
            rev->handler(rev);

            if (c->udp) {
                c->udp->buffer = NULL;
            }

            rev->ready = 0;

            goto next;
        }

		//找不到则新建一个connection对象
        c = ngx_get_connection(lc->fd, ev->log);
        if (c == NULL) {
            return;
        }

        c->shared = 1;
        c->type = SOCK_DGRAM;
        c->socklen = socklen;

        ……
            
        //将本次读到的缓冲区buffer中的数据拷贝追加到临时缓冲区c->buffer后面
        c->buffer = ngx_create_temp_buf(c->pool, n);
        c->buffer->last = ngx_cpymem(c->buffer->last, buffer, n);
        
        //设置连接的recv和send操作函数
        //ngx_udp_shared_recv是直接从c->udp->buffer拷贝出数据
        c->recv = ngx_udp_shared_recv;
        c->send = ngx_udp_send;
        c->send_chain = ngx_udp_send_chain;
            
        //将新的对象插入到管理连接的红黑树中
        if (ngx_insert_udp_connection(c) != NGX_OK) {
            ngx_close_accepted_udp_connection(c);
            return;
        }

        //调用监听handler处理新连接事件,
        //此时handler为ngx_stream_init_connection@src/stream/ngx_stream_handler.c
        ls->handler(c);

    next:

        if (ngx_event_flags & NGX_USE_KQUEUE_EVENT) {
            ev->available -= n;
        }

    } while (ev->available);
}

新udp连接处理即新的session:

// src/stream/ngx_stream_handler.c
void ngx_stream_init_connection(ngx_connection_t *c){
    ……
    
    //新建一个session对象,做一些初始化赋值
    s = ngx_pcalloc(c->pool, sizeof(ngx_stream_session_t));

    ……

    if (c->buffer) {
        s->received += c->buffer->last - c->buffer->pos;
    }
    s->connection = c;
    c->data = s;
    
    //给connection的读事件赋session handler并用它处理新的session
    rev = c->read;
    rev->handler = ngx_stream_session_handler;
    rev->handler(rev);
}

void ngx_stream_session_handler(ngx_event_t *rev)
{
    ngx_connection_t      *c;
    ngx_stream_session_t  *s;

    c = rev->data;
    s = c->data;

    //执行代理过程中的各阶段中的步骤
    ngx_stream_core_run_phases(s);
}

有7个阶段,初始化它们的checker和handler:

// src/stream/ngx_stream.h
typedef enum {
    NGX_STREAM_POST_ACCEPT_PHASE = 0,
    NGX_STREAM_PREACCESS_PHASE,
    NGX_STREAM_ACCESS_PHASE,
    NGX_STREAM_SSL_PHASE,  //SSL握手阶段
    NGX_STREAM_PREREAD_PHASE, //此阶段读取数据,(SSL解密)
    NGX_STREAM_CONTENT_PHASE, //数据处理,代理转发
    NGX_STREAM_LOG_PHASE
} ngx_stream_phases;

// src/stream/ngx_stream.c
static ngx_int_t ngx_stream_init_phase_handlers(ngx_conf_t *cf,
    ngx_stream_core_main_conf_t *cmcf)
{
    ngx_int_t                     j;
    ngx_uint_t                    i, n;
    ngx_stream_handler_pt        *h;
    ngx_stream_phase_handler_t   *ph;
    ngx_stream_phase_handler_pt   checker;

    n = 1 /* content phase */;

    //汇总hander数
    for (i = 0; i < NGX_STREAM_LOG_PHASE; i++) {
        n += cmcf->phases[i].handlers.nelts;
    }

    ph = ngx_pcalloc(cf->pool,
                     n * sizeof(ngx_stream_phase_handler_t) + sizeof(void *));
    if (ph == NULL) {
        return NGX_ERROR;
    }

    cmcf->phase_engine.handlers = ph;
    n = 0;
    
    //初始化各阶段的checker和handler
    for (i = 0; i < NGX_STREAM_LOG_PHASE; i++) {
        h = cmcf->phases[i].handlers.elts;

        switch (i) {

        case NGX_STREAM_PREREAD_PHASE: //读客户端数据
            checker = ngx_stream_core_preread_phase;
            break;

        case NGX_STREAM_CONTENT_PHASE: //数据处理、代理转发,此处没有设置hander
            ph->checker = ngx_stream_core_content_phase;
            n++;
            ph++;

            continue;

        default:
            checker = ngx_stream_core_generic_phase;
        }

        n += cmcf->phases[i].handlers.nelts;

        for (j = cmcf->phases[i].handlers.nelts - 1; j >= 0; j--) {
            ph->checker = checker;
            ph->handler = h[j];
            ph->next = n;
            ph++;
        }
    }

    return NGX_OK;
}

再看运行各阶段的checker:

// src/stream/ngx_stream_core_module.c
void ngx_stream_core_run_phases(ngx_stream_session_t *s)
{
    ngx_int_t                     rc;
    ngx_stream_phase_handler_t   *ph;
    ngx_stream_core_main_conf_t  *cmcf;

    cmcf = ngx_stream_get_module_main_conf(s, ngx_stream_core_module);

    ph = cmcf->phase_engine.handlers;

    while (ph[s->phase_handler].checker) {
        //执行各阶段的checker函数,checker内部需要对数组下标s->phase_handler作前进操作
        rc = ph[s->phase_handler].checker(s, &ph[s->phase_handler]);

        //返回NGX_OK时不再执行后面的所有handler
        if (rc == NGX_OK) {
            return;
        }
    }
}

//默认的checker成员函数
ngx_int_t ngx_stream_core_generic_phase(ngx_stream_session_t *s,
    ngx_stream_phase_handler_t *ph)
{
    ngx_int_t  rc;

    //调用的还是handler
    rc = ph->handler(s);

    if (rc == NGX_OK) {
        //执行下一阶段的handler
        s->phase_handler = ph->next;
        return NGX_AGAIN;
    }

    if (rc == NGX_DECLINED) {
        //执行本阶段的下一个handler
        s->phase_handler++;
        return NGX_AGAIN;
    }

    if (rc == NGX_AGAIN || rc == NGX_DONE) {
        return NGX_OK;
    }

    if (rc == NGX_ERROR) {
        rc = NGX_STREAM_INTERNAL_SERVER_ERROR;
    }
    
    //出错则关闭session
    ngx_stream_finalize_session(s, rc);

    return NGX_OK;
}

//PREREAD阶段的checker成员函数
ngx_int_t ngx_stream_core_preread_phase(ngx_stream_session_t *s,
    ngx_stream_phase_handler_t *ph)
{
    size_t                       size;
    ssize_t                      n;
    ngx_int_t                    rc;
    ngx_connection_t            *c;
    ngx_stream_core_srv_conf_t  *cscf;

    c = s->connection;

    cscf = ngx_stream_get_module_srv_conf(s, ngx_stream_core_module);

    if (c->read->timedout) {
        rc = NGX_STREAM_OK;
    } else if (c->read->timer_set) {
        rc = NGX_AGAIN;
    } else {
        //handler为ngx_stream_ssl_preread_handler,处理已读取buf中的数据,可能需要进行SSL解密
        rc = ph->handler(s);
    }

    while (rc == NGX_AGAIN) {

        if (c->buffer == NULL) {
            c->buffer = ngx_create_temp_buf(c->pool, cscf->preread_buffer_size);
            if (c->buffer == NULL) {
                rc = NGX_ERROR;
                break;
            }
        }

        size = c->buffer->end - c->buffer->last;

        if (size == 0) {
            ngx_log_error(NGX_LOG_ERR, c->log, 0, "preread buffer full");
            rc = NGX_STREAM_BAD_REQUEST;
            break;
        }

        if (c->read->eof) {
            rc = NGX_STREAM_OK;
            break;
        }

        if (!c->read->ready) {
            break;
        }

        //读取数据到connection对象的buffer中
        n = c->recv(c, c->buffer->last, size);

        if (n == NGX_ERROR || n == 0) {
            rc = NGX_STREAM_OK;
            break;
        }

        if (n == NGX_AGAIN) {
            break;
        }

        c->buffer->last += n;

        //处理已读取buf中的数据,可能需要进行SSL解密
        rc = ph->handler(s);
    }

    if (rc == NGX_AGAIN) {
        if (ngx_handle_read_event(c->read, 0) != NGX_OK) {
            ngx_stream_finalize_session(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
            return NGX_OK;
        }

        if (!c->read->timer_set) {
            ngx_add_timer(c->read, cscf->preread_timeout);
        }

        c->read->handler = ngx_stream_session_handler;

        return NGX_OK;
    }

    if (c->read->timer_set) {
        ngx_del_timer(c->read);
    }

    if (rc == NGX_OK) {
         //执行下一阶段的handler
        s->phase_handler = ph->next;
        return NGX_AGAIN;
    }

    if (rc == NGX_DECLINED) {
         //执行本阶段的下一个handler
        s->phase_handler++;
        return NGX_AGAIN;
    }

    if (rc == NGX_DONE) {
        return NGX_OK;
    }

    if (rc == NGX_ERROR) {
        rc = NGX_STREAM_INTERNAL_SERVER_ERROR;
    }

    ngx_stream_finalize_session(s, rc);

    return NGX_OK;
}

//Content阶段的checker
ngx_int_t ngx_stream_core_content_phase(ngx_stream_session_t *s,
    ngx_stream_phase_handler_t *ph)
{
    ngx_connection_t            *c;
    ngx_stream_core_srv_conf_t  *cscf;

    c = s->connection;

    c->log->action = NULL;

    cscf = ngx_stream_get_module_srv_conf(s, ngx_stream_core_module);

    if (c->type == SOCK_STREAM
        && cscf->tcp_nodelay
        && ngx_tcp_nodelay(c) != NGX_OK)
    {
        ngx_stream_finalize_session(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return NGX_OK;
    }

    //也只调用了一个handler,这个handler是靠配置得来的,实际就是ngx_stream_proxy_handler
    cscf->handler(s);

    return NGX_OK;
}

Content阶段的hander,是配置proxy_pass模块中的ngx_stream_proxy_handler,即代理转发。nginx将与后端服务器节点的连接称为upstream,将与前端的连接称为downstream:

// src/stream/ngx_stream_proxy_module.c
static char *ngx_stream_proxy_pass(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{
	……
        
    cscf = ngx_stream_conf_get_module_srv_conf(cf, ngx_stream_core_module);
    
    //配置proxy_pass时就会有此handler
    cscf->handler = ngx_stream_proxy_handler;
    
    value = cf->args->elts;

    url = &value[1];

    ngx_memzero(&ccv, sizeof(ngx_stream_compile_complex_value_t));

    ……

    ngx_memzero(&u, sizeof(ngx_url_t));
    u.url = *url;
    u.no_resolve = 1;

    //找到对应的upstream
    pscf->upstream = ngx_stream_upstream_add(cf, &u, 0);
    if (pscf->upstream == NULL) {
        return NGX_CONF_ERROR;
    }
    return NGX_CONF_OK;
}

//代理handler
static void ngx_stream_proxy_handler(ngx_stream_session_t *s)
{
    u_char                           *p;
    ngx_str_t                        *host;
    ngx_uint_t                        i;
    ngx_connection_t                 *c;
    ngx_resolver_ctx_t               *ctx, temp;
    ngx_stream_upstream_t            *u;
    ngx_stream_core_srv_conf_t       *cscf;
    ngx_stream_proxy_srv_conf_t      *pscf;
    ngx_stream_upstream_srv_conf_t   *uscf, **uscfp;
    ngx_stream_upstream_main_conf_t  *umcf;

    c = s->connection;

    pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);

    u = ngx_pcalloc(c->pool, sizeof(ngx_stream_upstream_t));
    if (u == NULL) {
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    s->upstream = u;

    u->requests = 1;

    u->peer.type = c->type;
    u->start_sec = ngx_time();

    //设置downstream的读写handler,读写handler相同,靠参数确定是读还是写
    c->write->handler = ngx_stream_proxy_downstream_handler;
    c->read->handler = ngx_stream_proxy_downstream_handler;

    ……

    //准备upstream读取数据的缓冲区,也就是存放从upstream server读取来返回的给downstream客户端的数据的
    p = ngx_pnalloc(c->pool, pscf->buffer_size);
    if (p == NULL) {
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    ……

    if (c->read->ready) {
        //此时downstream发来的数据已经读取在缓冲区中,将read事件放到全局的双向链表事件队列中,
        //让全局事件轮询处理过程去处理每个事件,实际就是调用事件的handler函数,
        //也就是异步调用ngx_stream_proxy_downstream_handler
        ngx_post_event(c->read, &ngx_posted_events);
    }

    if (pscf->upstream_value) {
        if (ngx_stream_proxy_eval(s, pscf) != NGX_OK) {
            ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
            return;
        }
    }

    if (u->resolved == NULL) {
        uscf = pscf->upstream;
    } else {
        //域名解析
		……	
    }

    if (uscf == NULL) {
        ngx_log_error(NGX_LOG_ALERT, c->log, 0, "no upstream configuration");
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    //确定了upstream
    u->upstream = uscf;

    if (uscf->peer.init(s, uscf) != NGX_OK) {
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    u->peer.start_time = ngx_current_msec;

    ……

    //连接upstream中的代理服务器
    ngx_stream_proxy_connect(s);
}

//downstream的读写handler,靠ev->write这个标志位确定是读还是写
static void ngx_stream_proxy_downstream_handler(ngx_event_t *ev)
{
    //downstream的读写bool逻辑与是否从upstream读的逻辑相同
    //ev->write为true时,就是对downstream的写,也就是从upstream读,即from_upstream为true; 
    //反之,ev->write为false时,就是对downstream的读,也就是对upstream写,即from_upstream为false。 
    ngx_stream_proxy_process_connection(ev, ev->write);
}

//@from_upstream参数表示是对downstream连接的写还是读
static void ngx_stream_proxy_process_connection(ngx_event_t *ev, ngx_uint_t from_upstream)
{
    ……
    s = c->data;
    
    //一些连接状态、超时处理
	……

    //
    ngx_stream_proxy_process(s, from_upstream, ev->write);
}


static void ngx_stream_proxy_process(ngx_stream_session_t *s, ngx_uint_t from_upstream,
    ngx_uint_t do_write)
{
    ……
        
    u = s->upstream;

    //downstream连接
    c = s->connection;
    
    //upstream连接
    pc = u->connected ? u->peer.connection : NULL;

    ……

    pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);

    //根据参数from_upstream确定读写的方向
    if (from_upstream) {
        src = pc;
        dst = c;
        b = &u->upstream_buf; //upstream读缓冲区
        limit_rate = pscf->download_rate;
        received = &u->received;
        packets = &u->responses;
        out = &u->downstream_out;
        busy = &u->downstream_busy;
    } else {
        src = c;
        dst = pc;
        b = &u->downstream_buf; //downstream读缓冲区
        limit_rate = pscf->upload_rate;
        received = &s->received;
        packets = &u->requests;
        out = &u->upstream_out;
        busy = &u->upstream_busy;
    }

    for ( ;; ) {
        if (do_write && dst) {
            if (*out || *busy || dst->buffered) {
                //ngx_stream_top_filter实际是ngx_stream_write_filter
                //内部调用c->send_chain将数据转发
                rc = ngx_stream_top_filter(s, *out, from_upstream);

                if (rc == NGX_ERROR) {
                    ngx_stream_proxy_finalize(s, NGX_STREAM_OK);
                    return;
                }

                ngx_chain_update_chains(c->pool, &u->free, busy, out,
                                      (ngx_buf_tag_t) &ngx_stream_proxy_module);

                if (*busy == NULL) {
                    b->pos = b->start;
                    b->last = b->start;
                }
            }
        }
        
        size = b->end - b->last;

        if (size && src->read->ready && !src->read->delayed
            && !src->read->error)
        {
            if (limit_rate) {
                //限速处理
                ……
            }

            c->log->action = recv_action;

            //
            n = src->recv(src, b->last, size);
            ……
            if (n >= 0) {
                if (limit_rate) {
                    delay = (ngx_msec_t) (n * 1000 / limit_rate);

                    if (delay > 0) {
                        src->read->delayed = 1;
                        ngx_add_timer(src->read, delay);
                    }
                }

                if (from_upstream) {
                    if (u->state->first_byte_time == (ngx_msec_t) -1) {
                        u->state->first_byte_time = ngx_current_msec
                                                    - u->start_time;
                    }
                }

                for (ll = out; *ll; ll = &(*ll)->next) { /* void */ }

                cl = ngx_chain_get_free_buf(c->pool, &u->free);
                if (cl == NULL) {
                    ngx_stream_proxy_finalize(s,
                                              NGX_STREAM_INTERNAL_SERVER_ERROR);
                    return;
                }

                *ll = cl;

                cl->buf->pos = b->last;
                cl->buf->last = b->last + n;
                cl->buf->tag = (ngx_buf_tag_t) &ngx_stream_proxy_module;

                cl->buf->temporary = (n ? 1 : 0);
                cl->buf->last_buf = src->read->eof;
                cl->buf->flush = 1;

                (*packets)++;
                *received += n;
                b->last += n;
                do_write = 1; //标志有数据可以发送了
                continue;
            }
        }

        break;
    }

    //数据已经转发完,尝试关闭连接,会判断一些条件,看能不能关闭,尤其是udp
    if (ngx_stream_proxy_test_finalize(s, from_upstream) == NGX_OK) {
        return;
    }

    flags = src->read->eof ? NGX_CLOSE_EVENT : 0;

    if (!src->shared && ngx_handle_read_event(src->read, flags) != NGX_OK) {
        ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
        return;
    }

    if (dst) {
        if (!dst->shared && ngx_handle_write_event(dst->write, 0) != NGX_OK) {
            ngx_stream_proxy_finalize(s, NGX_STREAM_INTERNAL_SERVER_ERROR);
            return;
        }

        if (!c->read->delayed && !pc->read->delayed) {
            ngx_add_timer(c->write, pscf->timeout);

        } else if (c->write->timer_set) {
            ngx_del_timer(c->write);
        }
    }
}

//连接upstream中的代理服务器
static void ngx_stream_proxy_connect(ngx_stream_session_t *s)
{
    ngx_int_t                     rc;
    ngx_connection_t             *c, *pc;
    ngx_stream_upstream_t        *u;
    ngx_stream_proxy_srv_conf_t  *pscf;

    c = s->connection;
 
    ……
   
    //连接upstream中的代理服务器,内部通过一些负载均衡算法选择一个server来连接
    rc = ngx_event_connect_peer(&u->peer);

    ……

    //设置upstream连接handler
    pc = u->peer.connection;
    pc->read->handler = ngx_stream_proxy_connect_handler;
    pc->write->handler = ngx_stream_proxy_connect_handler;

    //添加upstream连接事件到timer中,实际是为了异步调用ngx_stream_proxy_connect_handler
    ngx_add_timer(pc->write, pscf->connect_timeout);
}

//连接upstream中的代理服务器
ngx_int_t ngx_event_connect_peer(ngx_peer_connection_t *pc)
{
    int                rc;
    
    //通过调用get函数指向指向的函数获取到一个合适的upstream server,由配置确定;
    //如果配置为一致性hash算法,则get = ngx_stream_upstream_get_chash_peer
    rc = pc->get(pc, pc->data);
    
    //连接过程,简略
    type = (pc->type ? pc->type : SOCK_STREAM);
    s = ngx_socket(pc->sockaddr->sa_family, type, 0);
    c = ngx_get_connection(s, pc->log);
    c->type = type;
    
    ……
    // upstream连接绑定源地址,如果要绑定downstream连接的源地址,
    // 需要配置transparent,以使得可以绑定非本机地址,即实现客户端ip端口透传
    if (pc->local) {
#if (NGX_HAVE_TRANSPARENT_PROXY)
        if (pc->transparent) {
            /*
              ngx_event_connect_set_transparent内部代码大致为:
              int value = 1;
              setsockopt(s, IPPROTO_IP, IP_TRANSPARENT,(const void *) &value, sizeof(int))
            */
            if (ngx_event_connect_set_transparent(pc, s) != NGX_OK) {
                goto failed;
            }
        }
#endif
        ……
        if (bind(s, pc->local->sockaddr, pc->local->socklen) == -1) {
            ngx_log_error(NGX_LOG_CRIT, pc->log, ngx_socket_errno,
                          "bind(%V) failed", &pc->local->name);

            goto failed;
        }
    }
  
    if (type == SOCK_STREAM) {
       ……
    } else { /* type == SOCK_DGRAM */
        c->recv = ngx_udp_recv;
        c->send = ngx_send;
        c->send_chain = ngx_udp_send_chain;
    }

    ……
    pc->connection = c;

    c->number = ngx_atomic_fetch_add(ngx_connection_counter, 1);

    if (ngx_add_conn) {
        if (ngx_add_conn(c) == NGX_ERROR) {
            goto failed;
        }
    }

    rc = connect(s, pc->sockaddr, pc->socklen);
    ……
    if (type == SOCK_STREAM) {
         ……
    } else { /* type == SOCK_DGRAM */
        c->recv = ngx_udp_recv;
        c->send = ngx_send;
        c->send_chain = ngx_udp_send_chain;
    }
	……
        
    if (ngx_add_conn) {

        wev->ready = 1;

        return NGX_OK;
    }
    ……
    
    return NGX_OK;
}

// upstream server连接后的handler
static void ngx_stream_proxy_connect_handler(ngx_event_t *ev)
{
    ngx_connection_t      *c;
    ngx_stream_session_t  *s;

    c = ev->data;
    s = c->data;

    ……

    //测试一下连接,如果失败了就关闭此连接后,再根据负载算法选择下一个upstream server去连接
    if (ngx_stream_proxy_test_connect(c) != NGX_OK) {
        ngx_stream_proxy_next_upstream(s);
        return;
    }

    //初始化upstream连接
    ngx_stream_proxy_init_upstream(s);
}

static void ngx_stream_proxy_init_upstream(ngx_stream_session_t *s)
{
    ngx_connection_t             *c, *pc;
    ngx_log_handler_pt            handler;
    ngx_stream_upstream_t        *u;

    ……
    u = s->upstream;
    pc = u->peer.connection;
    ……
    c = s->connection;

    u->connected = 1;
    pc->read->handler = ngx_stream_proxy_upstream_handler;
    pc->write->handler = ngx_stream_proxy_upstream_handler;

    if (pc->read->ready) {
        //异步读
        ngx_post_event(pc->read, &ngx_posted_events);
    }

    //调用前面提到的ngx_stream_proxy_process,从downstream读,转发到upstream
    ngx_stream_proxy_process(s, 0, 1);
}

转发:

// src/stream/ngx_stream_write_filter_module.c
static ngx_int_t ngx_stream_write_filter(ngx_stream_session_t *s, ngx_chain_t *in,
    ngx_uint_t from_upstream)
{
    ngx_chain_t                    *cl, *ln, **ll, **out, *chain;
    ngx_connection_t               *c;
    ……
    if (from_upstream) {
        c = s->connection;
        out = &ctx->from_upstream;

    } else {
        c = s->upstream->peer.connection;
        out = &ctx->from_downstream;
    }
    ……
    //转发数据
    chain = c->send_chain(c, *out, 0);
    ……
    return NGX_OK;
}

一致性hash负载:

// src/stream/ngx_stream_upstream_hash_module.c
static ngx_int_t ngx_stream_upstream_init_chash_peer(ngx_stream_session_t *s,
    ngx_stream_upstream_srv_conf_t *us)
{
    uint32_t                               hash;
    ngx_stream_upstream_hash_srv_conf_t   *hcf;
    ngx_stream_upstream_hash_peer_data_t  *hp;

    if (ngx_stream_upstream_init_hash_peer(s, us) != NGX_OK) {
        return NGX_ERROR;
    }

    s->upstream->peer.get = ngx_stream_upstream_get_chash_peer;

    hp = s->upstream->peer.data;
    hcf = ngx_stream_conf_upstream_srv_conf(us,
                                            ngx_stream_upstream_hash_module);

    //根据配置计算session的hash
    hash = ngx_crc32_long(hp->key.data, hp->key.len);

    ngx_stream_upstream_rr_peers_rlock(hp->rrp.peers);

    //找到hash值所属槽位
    hp->hash = ngx_stream_upstream_find_chash_point(hcf->points, hash);

    ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);

    return NGX_OK;
}


static ngx_int_t ngx_stream_upstream_get_chash_peer(ngx_peer_connection_t *pc, void *data)
{
    ngx_stream_upstream_hash_peer_data_t *hp = data;

    time_t                                now;
    intptr_t                              m;
    ngx_str_t                            *server;
    ngx_int_t                             total;
    ngx_uint_t                            i, n, best_i;
    ngx_stream_upstream_rr_peer_t        *peer, *best;
    ngx_stream_upstream_chash_point_t    *point;
    ngx_stream_upstream_chash_points_t   *points;
    ngx_stream_upstream_hash_srv_conf_t  *hcf;

    ngx_log_debug1(NGX_LOG_DEBUG_STREAM, pc->log, 0,
                   "get consistent hash peer, try: %ui", pc->tries);

    ngx_stream_upstream_rr_peers_wlock(hp->rrp.peers);

    if (hp->tries > 20 || hp->rrp.peers->single) {
        ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
        return hp->get_rr_peer(pc, &hp->rrp);
    }

    pc->connection = NULL;

    now = ngx_time();
    hcf = hp->conf;

    points = hcf->points;
    point = &points->point[0];

    for ( ;; ) {
        //通过hash得到对应的upstream server
        server = point[hp->hash % points->number].server;

        best = NULL;
        best_i = 0;
        total = 0;

        //然后在循环调试队列中验证该upstream server是否有效,无效则则循环取下一个server
        for (peer = hp->rrp.peers->peer, i = 0;
             peer;
             peer = peer->next, i++)
        {
            //调度掩码
            n = i / (8 * sizeof(uintptr_t));
            m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));
            if (hp->rrp.tried[n] & m) {
                continue;
            }

            //是否被标记为down
            if (peer->down) {
                continue;
            }

            //健康检查失败次数是否超过配置的最大次数
            if (peer->max_fails
                && peer->fails >= peer->max_fails
                && now - peer->checked <= peer->fail_timeout)
            {
                continue;
            }

            //已连接数是否超过超过配置的最大次数
            if (peer->max_conns && peer->conns >= peer->max_conns) {
                continue;
            }

            //地址是否一致
            if (peer->server.len != server->len
                || ngx_strncmp(peer->server.data, server->data, server->len)
                   != 0)
            {
                continue;
            }

            peer->current_weight += peer->effective_weight;
            total += peer->effective_weight;

            if (peer->effective_weight < peer->weight) {
                peer->effective_weight++;
            }

            if (best == NULL || peer->current_weight > best->current_weight) {
                best = peer;
                best_i = i;
            }
        }

        if (best) {
            //找到则跳出循环
            best->current_weight -= total;
            break;
        }

        //否则hash索引加1,循环调度寻找一个server
        hp->hash++;
        hp->tries++;

        if (hp->tries > 20) {
            //循环尝试20次都找不到使用循环调度
            ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
            return hp->get_rr_peer(pc, &hp->rrp);
        }
    }

    hp->rrp.current = best;

    pc->sockaddr = best->sockaddr;
    pc->socklen = best->socklen;
    pc->name = &best->name;

    //连接数+1
    best->conns++;

    //更新最新check时间
    if (now - best->checked > best->fail_timeout) {
        best->checked = now;
    }

    ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);

    //更新调度掩码位
    n = best_i / (8 * sizeof(uintptr_t));
    m = (uintptr_t) 1 << best_i % (8 * sizeof(uintptr_t));
    hp->rrp.tried[n] |= m;

    return NGX_OK;
}

限制连接模块:

// src/stream/ngx_stream_limit_conn_module.c
static ngx_int_t ngx_stream_limit_conn_init(ngx_conf_t *cf)
{
    ngx_stream_handler_pt        *h;
    ngx_stream_core_main_conf_t  *cmcf;

    cmcf = ngx_stream_conf_get_module_main_conf(cf, ngx_stream_core_module);

    //添加了一个NGX_STREAM_PREACCESS_PHASE阶段的handler,即ngx_stream_limit_conn_handler,
    //于是就走了限制连接的流程
    h = ngx_array_push(&cmcf->phases[NGX_STREAM_PREACCESS_PHASE].handlers);
    if (h == NULL) {
        return NGX_ERROR;
    }

    *h = ngx_stream_limit_conn_handler;

    return NGX_OK;
}

static ngx_int_t ngx_stream_limit_conn_handler(ngx_stream_session_t *s)
{
    size_t                            n;
    uint32_t                          hash;
    ngx_str_t                         key;
    ngx_uint_t                        i;
    ngx_slab_pool_t                  *shpool;
    ngx_rbtree_node_t                *node;
    ngx_pool_cleanup_t               *cln;
    ngx_stream_limit_conn_ctx_t      *ctx;
    ngx_stream_limit_conn_node_t     *lc;
    ngx_stream_limit_conn_conf_t     *lccf;
    ngx_stream_limit_conn_limit_t    *limits;
    ngx_stream_limit_conn_cleanup_t  *lccln;

    lccf = ngx_stream_get_module_srv_conf(s, ngx_stream_limit_conn_module);
    limits = lccf->limits.elts;

    for (i = 0; i < lccf->limits.nelts; i++) {
        ctx = limits[i].shm_zone->data;

        //根据配置得出session的hash key,比如我们根据客户端ip来限制连接数
        if (ngx_stream_complex_value(s, &ctx->key, &key) != NGX_OK) {
            return NGX_ERROR;
        }

        if (key.len == 0) {
            continue;
        }

        if (key.len > 255) {
            ngx_log_error(NGX_LOG_ERR, s->connection->log, 0,
                          "the value of the \"%V\" key "
                          "is more than 255 bytes: \"%V\"",
                          &ctx->key.value, &key);
            continue;
        }

        //根据配置计算session hash key的hash值
        hash = ngx_crc32_short(key.data, key.len);

        shpool = (ngx_slab_pool_t *) limits[i].shm_zone->shm.addr;

        ngx_shmtx_lock(&shpool->mutex);

        //hash值由红黑树数据结构管理,在已构建树中查找hash值,找到node节点
        node = ngx_stream_limit_conn_lookup(ctx->rbtree, &key, hash);

        if (node == NULL) {
            //没到找,则新增
            n = offsetof(ngx_rbtree_node_t, color)
                + offsetof(ngx_stream_limit_conn_node_t, data)
                + key.len;

            node = ngx_slab_alloc_locked(shpool, n);

            if (node == NULL) {
                ngx_shmtx_unlock(&shpool->mutex);
                ngx_stream_limit_conn_cleanup_all(s->connection->pool);
                return NGX_STREAM_SERVICE_UNAVAILABLE;
            }

            lc = (ngx_stream_limit_conn_node_t *) &node->color;

            node->key = hash;
            lc->len = (u_char) key.len;
            lc->conn = 1; //连接数初始为1
            ngx_memcpy(lc->data, key.data, key.len);

            ngx_rbtree_insert(ctx->rbtree, node);

        } else {

            lc = (ngx_stream_limit_conn_node_t *) &node->color;

            if ((ngx_uint_t) lc->conn >= limits[i].conn) {
			  //连接数超限,关闭连接
                
                ngx_shmtx_unlock(&shpool->mutex);

                ngx_stream_limit_conn_cleanup_all(s->connection->pool);
                return NGX_STREAM_SERVICE_UNAVAILABLE;
            }

            lc->conn++;//连接数+1
        }

        ngx_shmtx_unlock(&shpool->mutex);

        cln = ngx_pool_cleanup_add(s->connection->pool,
                                   sizeof(ngx_stream_limit_conn_cleanup_t));
        if (cln == NULL) {
            return NGX_ERROR;
        }

        cln->handler = ngx_stream_limit_conn_cleanup;
        lccln = cln->data;

        lccln->shm_zone = limits[i].shm_zone;
        lccln->node = node;
    }

    return NGX_DECLINED;
}

最后看一下前面提到的数据转发函数里ngx_stream_proxy_process中,数据转发完后,调用了ngx_stream_proxy_test_finalize函数:

static ngx_int_t ngx_stream_proxy_test_finalize(ngx_stream_session_t *s,
    ngx_uint_t from_upstream)
{
    ngx_connection_t             *c, *pc;
    ngx_log_handler_pt            handler;
    ngx_stream_upstream_t        *u;
    ngx_stream_proxy_srv_conf_t  *pscf;

    pscf = ngx_stream_get_module_srv_conf(s, ngx_stream_proxy_module);

    c = s->connection;
    u = s->upstream;
    pc = u->connected ? u->peer.connection : NULL;

    if (c->type == SOCK_DGRAM) {

        if (pscf->requests && u->requests < pscf->requests) {
            return NGX_DECLINED;
        }

        if (pscf->requests) {
            ngx_delete_udp_connection(c);
        }

        //pscf->responses就是配置中的proxy_responses项,表示一次请求对应多少次响应
        //没有配置时默认为int32最大值,无限;
        //当pscf->responses没有配置时或者upstream回包次数小于期望值时,值认为会话还要保持,
        //否则就立即断开连接会话。如果配置为0会导致会话立即关闭,session生命周期很短暂。
        if (pscf->responses == NGX_MAX_INT32_VALUE
            || u->responses < pscf->responses * u->requests)
        {
            return NGX_DECLINED;
        }

        if (pc == NULL || c->buffered || pc->buffered) {
            return NGX_DECLINED;
        }

        handler = c->log->handler;
        c->log->handler = NULL;

        ngx_log_error(NGX_LOG_INFO, c->log, 0,
                      "udp done"
                      ", packets from/to client:%ui/%ui"
                      ", bytes from/to client:%O/%O"
                      ", bytes from/to upstream:%O/%O",
                      u->requests, u->responses,
                      s->received, c->sent, u->received, pc ? pc->sent : 0);

        c->log->handler = handler;

        ngx_stream_proxy_finalize(s, NGX_STREAM_OK);

        return NGX_OK;
    }

    ……

    return NGX_OK;
}

你可能感兴趣的:(框架学习)