nginx源码分析(11)-进程启动分析(1)

nginx的进程启动过程是在ngx_master_process_cycle(src/os/unix/ngx_process_cycle.c)中完成的(单进程是通过ngx_single_process_cycle完成,这里只分析多进程的情况),在ngx_master_process_cycle中,会根据配置文件的worker_processes值创建多个子进程,即一个master进程和多个worker进程。进程之间、进程与外部之间保持通信,进程之间是通过socketpair进行通信的,socketpair是一对全双工的无名socket,可以当作管道使用,和管道不同的是,每条socket既可以读也可以写,而管道只能用于写或者用于读;进程与外部之间是通过信号通信的。

master进程主要进行一些全局性的初始化工作和管理worker的工作;事件处理是在worker中进行的。

进程启动的过程中,有一些重要的全局数据会被设置,最重要的是进程表ngx_processes,master每创建一个worker都会把一个设置好的ngx_process_t结构变量放入ngx_processes中,进程表长度为1024,刚创建的进程存放在ngx_process_slot位置,ngx_last_process是进程表中最后一个存量进程的下一个位置,ngx_process_t是进程在nginx中的抽象:

typedef void (*ngx_spawn_proc_pt) (ngx_cycle_t *cycle, void *data);

typedef struct {
    ngx_pid_t           pid;
    int                 status;
    ngx_socket_t        channel[2];

    ngx_spawn_proc_pt   proc;
    void               *data;
    char               *name;

    unsigned            respawn:1;
    unsigned            just_spawn:1;
    unsigned            detached:1;
    unsigned            exiting:1;
    unsigned            exited:1;
} ngx_process_t;(src/os/unix/ngx_process.h)

pid是进程的id;

status是进程的退出状态;

channel[2]是socketpair创建的一对socket句柄;

proc是进程的执行函数,data为proc的参数;

最后的几个位域是进程的状态,respawn:重新创建的、just_spawn:第一次创建的、detached:分离的、exiting:正在退出、exited:已经退出。

进程间通信是利用socketpair创建的一对socket进行的,通信中传输的是ngx_channel_t结构变量:

typedef struct {
     ngx_uint_t  command;
     ngx_pid_t   pid;
     ngx_int_t   slot;
     ngx_fd_t    fd;
} ngx_channel_t;(src/os/unix/ngx_channel.h)

command是要发送的命令,有5种:

#define NGX_CMD_OPEN_CHANNEL   1
#define NGX_CMD_CLOSE_CHANNEL  2
#define NGX_CMD_QUIT           3
#define NGX_CMD_TERMINATE      4
#define NGX_CMD_REOPEN         5

pid是发送方进程的进程id;

slot是发送方进程在进程表中偏移位置;

fd是发送给对方的句柄。

进程的启动过程是比较重要的一个环节,为了把这个过程分析透彻,下面会多采用注释代码的方式分析。

首先分析ngx_master_process_cycle函数,可以分解为以下各步骤:

1、master设置一些需要处理的信号,这些信号包括SIGCHLD,SIGALRM,SIGIO,SIGINT,NGX_RECONFIGURE_SIGNAL(SIGHUP),NGX_REOPEN_SIGNAL(SIGUSR1),
NGX_NOACCEPT_SIGNAL(SIGWINCH),NGX_TERMINATE_SIGNAL(SIGTERM),NGX_SHUTDOWN_SIGNAL(SIGQUIT),
NGX_CHANGEBIN_SIGNAL(SIGUSR2);

2、调用ngx_setproctilte设置进程标题,title = "master process" + ngx_argv[0] + ... + ngx_argv[ngx_argc-1];

3、调用ngx_start_worker_processes(cycle, ccf->worker_processes, NGX_PROCESS_RESPAWN)启动worker进程;

4、调用ngx_start_cache_manager_processes(cycle, 0)启动文件cache管理进程,有些模块需要文件cache,比如fastcgi模块,这些模块会把文件cache路径添加到cycle->paths中,文件cache管理进程会定期调用这些模块的文件cache处理钩子处理一下文件cache;

5、master循环处理信号量。
    ngx_new_binary = 0;
    delay = 0;
    live = 1;

    for ( ;; ) {
        // delay用来设置等待worker退出的时间,master接收了退出信号后首先发送退出信号给worker,
        // 而worker退出需要一些时间
        if (delay) {
            delay *= 2;

            ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
                           "temination cycle: %d", delay);

            itv.it_interval.tv_sec = 0;
            itv.it_interval.tv_usec = 0;
            itv.it_value.tv_sec = delay / 1000;
            itv.it_value.tv_usec = (delay % 1000 ) * 1000;

            // 设置定时器
            if (setitimer(ITIMER_REAL, &itv, NULL) == -1) {
                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                              "setitimer() failed");
            }
        }

        ngx_log_debug0(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "sigsuspend");

        // 挂起信号量,等待定时器
        sigsuspend(&set);

        ngx_time_update(0, 0);

        ngx_log_debug0(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "wake up");

        // 收到了SIGCHLD信号,有worker退出(ngx_reap==1)
        if (ngx_reap) {
            ngx_reap = 0;
            ngx_log_debug0(NGX_LOG_DEBUG_EVENT, cycle->log, 0, "reap children");

            // 处理所有worker,如果有worker异常退出则重启这个worker,如果所有worker都退出
            // 返回0赋值给live
            live = ngx_reap_children(cycle);
        }

        // 如果worker都已经退出,
        // 并且收到了NGX_CMD_TERMINATE命令或者SIGTERM信号或者SIGINT信号(ngx_terminate=1)
        // 或者NGX_CMD_QUIT命令或者SIGQUIT信号(ngx_quit=1),则master退出
        if (!live && (ngx_terminate || ngx_quit)) {
            ngx_master_process_exit(cycle);
        }

        // 收到了NGX_CMD_TERMINATE命令或者SIGTERM信号或者SIGINT信号,
        // 通知所有worker退出,并且等待worker退出
        if (ngx_terminate) {
            // 设置延时
            if (delay == 0) {
                delay = 50;
            }

            if (delay > 1000) {
                // 延时已到,给所有worker发送SIGKILL信号,强制杀死worker
                ngx_signal_worker_processes(cycle, SIGKILL);
            } else {
                // 给所有worker发送SIGTERM信号,通知worker退出
                ngx_signal_worker_processes(cycle,
                                       ngx_signal_value(NGX_TERMINATE_SIGNAL));
            }

            continue;
        }

        // 收到了NGX_CMD_QUIT命令或者SIGQUIT信号
        if (ngx_quit) {
            // 给所有worker发送SIGQUIT信号
            ngx_signal_worker_processes(cycle,
                                        ngx_signal_value(NGX_SHUTDOWN_SIGNAL));

            // 关闭所有监听的socket
            ls = cycle->listening.elts;
            for (n = 0; n < cycle->listening.nelts; n++) {
                if (ngx_close_socket(ls[n].fd) == -1) {
                    ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_socket_errno,
                                  ngx_close_socket_n " %V failed",
                                  &ls[n].addr_text);
                }
            }
            cycle->listening.nelts = 0;

            continue;
        }

        // 收到了SIGHUP信号
        if (ngx_reconfigure) {
            ngx_reconfigure = 0;

            // 代码已经被替换,重启worker,不需要重新初始化配置
            if (ngx_new_binary) {
                ngx_start_worker_processes(cycle, ccf->worker_processes,
                                           NGX_PROCESS_RESPAWN);
                ngx_start_cache_manager_processes(cycle, 0);
                ngx_noaccepting = 0;

                continue;
            }

            ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, "reconfiguring");

            // 重新初始化配置
            cycle = ngx_init_cycle(cycle);
            if (cycle == NULL) {
                cycle = (ngx_cycle_t *) ngx_cycle;
                continue;
            }

            // 重启worker
            ngx_cycle = cycle;
            ccf = (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx,
                                                   ngx_core_module);
            ngx_start_worker_processes(cycle, ccf->worker_processes,
                                       NGX_PROCESS_JUST_RESPAWN);
            ngx_start_cache_manager_processes(cycle, 1);
            live = 1;
            ngx_signal_worker_processes(cycle,
                                        ngx_signal_value(NGX_SHUTDOWN_SIGNAL));
        }

        // 当ngx_noaccepting=1的时候会把ngx_restart设为1,重启worker
        if (ngx_restart) {
            ngx_restart = 0;
            ngx_start_worker_processes(cycle, ccf->worker_processes,
                                       NGX_PROCESS_RESPAWN);
            ngx_start_cache_manager_processes(cycle, 0);
            live = 1;
        }

        // 收到SIGUSR1信号,重新打开log文件
        if (ngx_reopen) {
            ngx_reopen = 0;
            ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, "reopening logs");
            ngx_reopen_files(cycle, ccf->user);
            ngx_signal_worker_processes(cycle,
                                        ngx_signal_value(NGX_REOPEN_SIGNAL));
        }

        // 收到SIGUSR2信号,热代码替换
        if (ngx_change_binary) {
            ngx_change_binary = 0;
            ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, "changing binary");
            // 调用execve执行新的代码
            ngx_new_binary = ngx_exec_new_binary(cycle, ngx_argv);
        }

        // 收到SIGWINCH信号,不再接收请求,worker退出,master不退出
        if (ngx_noaccept) {
            ngx_noaccept = 0;
            ngx_noaccepting = 1;
            ngx_signal_worker_processes(cycle,
                                        ngx_signal_value(NGX_SHUTDOWN_SIGNAL));
        }
    }

真正创建worker子进程的函数是ngx_start_worker_processes,这个函数本身很简单:

static void
ngx_start_worker_processes(ngx_cycle_t *cycle, ngx_int_t n, ngx_int_t type)
{
    ngx_int_t      i;   
    ngx_channel_t  ch;  

    ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, "start worker processes");

    // 传递给其他worker子进程的命令:打开通信管道
    ch.command = NGX_CMD_OPEN_CHANNEL;

    // 创建n个worker子进程
    for (i = 0; i < n; i++) {

        cpu_affinity = ngx_get_cpu_affinity(i);

        // ngx_spawn_process创建worker子进程并初始化相关资源和属性,
        // 然后执行子进程的执行函数ngx_worker_process_cycle
        ngx_spawn_process(cycle, ngx_worker_process_cycle, NULL,
                          "worker process", type);

        // 向之前已经创建的所有worker广播当前创建的worker进程的信息,后面会详细分析
        ch.pid = ngx_processes[ngx_process_slot].pid;
        ch.slot = ngx_process_slot;
        ch.fd = ngx_processes[ngx_process_slot].channel[0];

        ngx_pass_open_channel(cycle, &ch);
    }    
}

把ngx_pass_open_channel展开如下:

static void
ngx_pass_open_channel(ngx_cycle_t *cycle, ngx_channel_t *ch)
{
    ngx_int_t  i;

    for (i = 0; i < ngx_last_process; i++) {

        // 跳过自己和异常的worker
        if (i == ngx_process_slot
            || ngx_processes[i].pid == -1
            || ngx_processes[i].channel[0] == -1)
        {
            continue;
        }

        ngx_log_debug6(NGX_LOG_DEBUG_CORE, cycle->log, 0,
                      "pass channel s:%d pid:%P fd:%d to s:%i pid:%P fd:%d",
                      ch->slot, ch->pid, ch->fd,
                      i, ngx_processes[i].pid,
                      ngx_processes[i].channel[0]);

        /* TODO: NGX_AGAIN */

        // 发送消息给其他的worker
        ngx_write_channel(ngx_processes[i].channel[0],
                          ch, sizeof(ngx_channel_t), cycle->log);
    }
}

第三个要剖开的函数是创建子进程的ngx_pid_t ngx_spawn_process(ngx_cycle_t *cycle,  ngx_spawn_proc_pt proc, void *data, char *name, ngx_int_t respawn),这个函数定义在src/os/unix/ngx_process.c中,proc是子进程的执行函数,data是其参数,name是子进程的名字。

{
    u_long     on;
    ngx_pid_t  pid;
    ngx_int_t  s; // 将要创建的子进程在进程表中的位置

    if (respawn >= 0) {
        // 替换进程ngx_processes[respawn],可安全重用该进程表项
        s = respawn;

    } else {
        // 先找到一个被回收的进程表项
        for (s = 0; s < ngx_last_process; s++) {
            if (ngx_processes[s].pid == -1) {
                break;
            }
        }

        // 进程表已满
        if (s == NGX_MAX_PROCESSES) {
            ngx_log_error(NGX_LOG_ALERT, cycle->log, 0,
                          "no more than %d processes can be spawned",
                          NGX_MAX_PROCESSES);
            return NGX_INVALID_PID;
        }
    }


    // 不是分离的子进程
    if (respawn != NGX_PROCESS_DETACHED) {

        /* Solaris 9 still has no AF_LOCAL */

        /*
        创建socketpair用于进程间通信,master进程为每个worker创建一对socket,
        master进程空间打开所有socketpair的channel[0],channel[1]两端句柄。

        当创建一个worker的时候,这个worker会继承master当前已经创建并打开的所有
        socketpair,这个worker初始化的时候(调用ngx_worker_process_init)会
        关闭掉本进程对应socketpair的channel[0]和其他worker对应的channel[1],
        保持打开本进程对应socketpair的channel[1]和其他worker对应的channel[0],
        并监听本进程对应socketpair的channel[1]的可读事件。这样,每个worker就
        拥有了其他worker的channel[0],可以sendmsg(channel[0], ...)向其他worker
        发送消息。

        细心的读者会发现,先于当前worker创建的worker通过继承得到了其channel[0],
        但是之后创建的进程的channel[0]该如何获得呢,答案在上面(ngx_start_worker_processes)
        master在创建并启动完成一个worker之后,会调用ngx_pass_open_channel
        把这个worker的channel[0]和进程id、在进程表中的偏移slot广播给所有其他已经
        创建的worker,这样,创建完所有进程之后,每个worker就获得了所有其他worker
        的channel[0]了。
        */

        // 创建一对已经连接的无名socket
        if (socketpair(AF_UNIX, SOCK_STREAM, 0, ngx_processes[s].channel) == -1)
        {
            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                          "socketpair() failed while spawning /"%s/"", name);
            return NGX_INVALID_PID;
        }

        ngx_log_debug2(NGX_LOG_DEBUG_CORE, cycle->log, 0,
                       "channel %d:%d",
                       ngx_processes[s].channel[0],
                       ngx_processes[s].channel[1]);

        // 设置socket为非阻塞模式
        if (ngx_nonblocking(ngx_processes[s].channel[0]) == -1) {
            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                          ngx_nonblocking_n " failed while spawning /"%s/"",
                          name);
            ngx_close_channel(ngx_processes[s].channel, cycle->log);
            return NGX_INVALID_PID;
        }

        if (ngx_nonblocking(ngx_processes[s].channel[1]) == -1) {
            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                          ngx_nonblocking_n " failed while spawning /"%s/"",
                          name);
            ngx_close_channel(ngx_processes[s].channel, cycle->log);
            return NGX_INVALID_PID;
        }

        // 开启channel[0]的消息驱动IO
        on = 1;
        if (ioctl(ngx_processes[s].channel[0], FIOASYNC, &on) == -1) {
            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                          "ioctl(FIOASYNC) failed while spawning /"%s/"", name);
            ngx_close_channel(ngx_processes[s].channel, cycle->log);
            return NGX_INVALID_PID;
        }

        // 设置channel[0]的属主,控制channel[0]的SIGIO信号只发给这个进程
        if (fcntl(ngx_processes[s].channel[0], F_SETOWN, ngx_pid) == -1) {
            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                          "fcntl(F_SETOWN) failed while spawning /"%s/"", name);
            ngx_close_channel(ngx_processes[s].channel, cycle->log);
            return NGX_INVALID_PID;
        }

        // 若进程执行了exec后关闭socket
        if (fcntl(ngx_processes[s].channel[0], F_SETFD, FD_CLOEXEC) == -1) {
            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                          "fcntl(FD_CLOEXEC) failed while spawning /"%s/"",
                           name);
            ngx_close_channel(ngx_processes[s].channel, cycle->log);
            return NGX_INVALID_PID;
        }

        if (fcntl(ngx_processes[s].channel[1], F_SETFD, FD_CLOEXEC) == -1) {
            ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                          "fcntl(FD_CLOEXEC) failed while spawning /"%s/"",
                           name);
            ngx_close_channel(ngx_processes[s].channel, cycle->log);
            return NGX_INVALID_PID;
        }

        // 用于监听可读事件的socket
        ngx_channel = ngx_processes[s].channel[1];

    } else {
        ngx_processes[s].channel[0] = -1;
        ngx_processes[s].channel[1] = -1;
    }

    // 设置当前子进程的进程表项索引
    ngx_process_slot = s;


    // 创建子进程
    pid = fork();

    switch (pid) {

    case -1:
        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                      "fork() failed while spawning /"%s/"", name);
        ngx_close_channel(ngx_processes[s].channel, cycle->log);
        return NGX_INVALID_PID;

    case 0:
        // 设置当前子进程的进程id
        ngx_pid = ngx_getpid();
        // 子进程运行执行函数
        proc(cycle, data);
        break;

    default:
        break;
    }

    ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, "start %s %P", name, pid);

    // 设置一些进程表项字段
    ngx_processes[s].pid = pid;
    ngx_processes[s].exited = 0;

    // 替换进程ngx_processes[respawn],不用设置其他进程表项字段了
    if (respawn >= 0) {
        return pid;
    }

    // 设置其他的进程表项字段
    ngx_processes[s].proc = proc;
    ngx_processes[s].data = data;
    ngx_processes[s].name = name;
    ngx_processes[s].exiting = 0;

    // 设置进程表项的一些状态字段
    switch (respawn) {

    case NGX_PROCESS_NORESPAWN:
        ngx_processes[s].respawn = 0;
        ngx_processes[s].just_spawn = 0;
        ngx_processes[s].detached = 0;
        break;

    case NGX_PROCESS_JUST_SPAWN:
        ngx_processes[s].respawn = 0;
        ngx_processes[s].just_spawn = 1;
        ngx_processes[s].detached = 0;
        break;

    case NGX_PROCESS_RESPAWN:
        ngx_processes[s].respawn = 1;
        ngx_processes[s].just_spawn = 0;
        ngx_processes[s].detached = 0;
        break;

    case NGX_PROCESS_JUST_RESPAWN:
        ngx_processes[s].respawn = 1;
        ngx_processes[s].just_spawn = 1;
        ngx_processes[s].detached = 0;
        break;

    // 分离的子进程,不受master控制?
    case NGX_PROCESS_DETACHED:
        ngx_processes[s].respawn = 0;
        ngx_processes[s].just_spawn = 0;
        ngx_processes[s].detached = 1;
        break;
    }

    if (s == ngx_last_process) {
        ngx_last_process++;
    }

    return pid;
}

go on

你可能感兴趣的:(nginx,socket,cache,cmd,Signal,delay)