nginx aio机制详解

linux 2.6以上内核提供以下几个系统调用来支持aio:

1、  SYS_io_setup:建立aio 的context

2、  SYS_io_submit: 提交I/O操作请求

3、  SYS_io_getevents:获取已完成的I/O事件

4、  SYS_io_cancel:取消I/O操作请求

5、  SYS_io_destroy:毁销aio的context

 

    nginx中将aio和epoll事件模型(假设nginx使用epoll事件模型)组合起来使用,当请求的I/O操作完成时调用epoll相关函数通知应用程序来读取。组合的关键点在于使用了eventfd对象,那什么是eventfd呢?有关eventfd的详解资料参考http://linux.die.net/man/2/eventfd。

 

    nginx在woker工作进程启动后初始化epoll事件模型时初始化aio(ngx_epoll_aio_init函数负责aio的初始化)。实现原理如下:在aio初始化时调用eventfd系统调用创建一个eventfd对象,eventfd系统调用返回一个与eventfd对象关联的文件描述符,设置该描述符为非阻塞并添加到epoll中,当该描述符可读时epoll_wait函数返回调用read函数读取当前完成的I/O操作个数,我们通过调用SYS_io_getevents系统调用就可以获取已完成的I/o事件。但eventfd描述符什么时候可读呢?这需要在提交I/O事件时将eventfd与aio关联起来(实现代码在ngx_file_aio_read函数中),提交I/O事件后如果I/O事件已完成就将当前完成的事件个数写入到eventfd描述符相关的计数器中并标识eventfd可读。下面来看看nginx中与aio相关的源代码:


ngx_epoll_aio_init函数:

static void
ngx_epoll_aio_init(ngx_cycle_t *cycle, ngx_epoll_conf_t *epcf)
{
    int                 n;
    struct epoll_event  ee;

    /***调用eventfd系统调用新建一个eventfd对象,第二个参数中的0表示eventfd的计数器初始值为0,
            系统调用成功返回的是与eventfd对象关联的描述符***/
    ngx_eventfd = syscall(SYS_eventfd, 0);

    if (ngx_eventfd == -1) {
        ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
                      "eventfd() failed");
        ngx_file_aio = 0;
        return;
    }

    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
                   "eventfd: %d", ngx_eventfd);

    n = 1;

    //设置描述符为非阻塞
    if (ioctl(ngx_eventfd, FIONBIO, &n) == -1) {
        ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
                      "ioctl(eventfd, FIONBIO) failed");
        goto failed;
    }

    //调用SYS_io_setup系统调用建立aio context
    if (io_setup(epcf->aio_requests, &ngx_aio_ctx) == -1) {
        ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
                      "io_setup() failed");
        goto failed;
    }

    //ngx_epoll_eventfd_handler函数当ngx_eventfd描述符可读时被调用
    ngx_eventfd_event.data = &ngx_eventfd_conn;
    ngx_eventfd_event.handler = ngx_epoll_eventfd_handler;
    ngx_eventfd_event.log = cycle->log;
    ngx_eventfd_event.active = 1;
    ngx_eventfd_conn.fd = ngx_eventfd;
    ngx_eventfd_conn.read = &ngx_eventfd_event;
    ngx_eventfd_conn.log = cycle->log;

    ee.events = EPOLLIN|EPOLLET;
    ee.data.ptr = &ngx_eventfd_conn;

    //将ngx_eventfd加入到epoll中
    if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_eventfd, &ee) != -1) {
        return;
    }

    ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
                  "epoll_ctl(EPOLL_CTL_ADD, eventfd) failed");

    //epoll_ctl失败时需毁销aio的context
    if (io_destroy(ngx_aio_ctx) == -1) {
        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                      "io_destroy() failed");
    }

failed:

    if (close(ngx_eventfd) == -1) {
        ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
                      "eventfd close() failed");
    }

    ngx_eventfd = -1;
    ngx_aio_ctx = 0;
    ngx_file_aio = 0;
}

ngx_epoll_eventfd_handler函数:

static void
ngx_epoll_eventfd_handler(ngx_event_t *ev)
{
    int               n, events;
    long              i;
    uint64_t          ready;
    ngx_err_t         err;
    ngx_event_t      *e;
    ngx_event_aio_t  *aio;
    struct io_event   event[64];
    struct timespec   ts;

    ngx_log_debug0(NGX_LOG_DEBUG_EVENT, ev->log, 0, "eventfd handler");

    //调用read函数读取已完成的I/O的个数
    n = read(ngx_eventfd, &ready, 8);

    . . . 

    
    ts.tv_sec = 0;
    ts.tv_nsec = 0;

    /***循环调用io_getevents函数获取所有已完成的I/O操作***/
    while (ready) {

        events = io_getevents(ngx_aio_ctx, 1, 64, event, &ts);

        ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
                       "io_getevents: %l", events);

        if (events > 0) {

            . . . 

            continue;
        }

        if (events == 0) {
            return;
        }

        /* events == -1 */
        ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_errno,
                      "io_getevents() failed");
        return;
    }
} 


ngx_file_aio_read函数:

ssize_t
ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset,
    ngx_pool_t *pool)
{
    ngx_err_t         err;
    struct iocb      *piocb[1];
    ngx_event_t      *ev;
    ngx_event_aio_t  *aio;

    . . . 

    ngx_memzero(&aio->aiocb, sizeof(struct iocb));

    /***设置iocb结构,注意aio->aiocb.aio_flags与aio->aiocb.aio_resfd这两个成员***/
    aio->aiocb.aio_data = (uint64_t) (uintptr_t) ev;
    aio->aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
    aio->aiocb.aio_fildes = file->fd;
    aio->aiocb.aio_buf = (uint64_t) (uintptr_t) buf;
    aio->aiocb.aio_nbytes = size;
    aio->aiocb.aio_offset = offset;
    /***当IOCB_FLAG_RESFD标识被设置时就使用aio->aiocb.aio_resfd变量中的描述符中通知用户态I/O事件已完成***/
    aio->aiocb.aio_flags = IOCB_FLAG_RESFD;
    aio->aiocb.aio_resfd = ngx_eventfd;

    ev->handler = ngx_file_aio_event_handler;

    piocb[0] = &aio->aiocb;

    //提交I/O事件
    if (io_submit(ngx_aio_ctx, 1, piocb) == 1) {
        ev->active = 1;
        ev->ready = 0;
        ev->complete = 0;

        return NGX_AGAIN;
    }

    err = ngx_errno;

    if (err == NGX_EAGAIN) {
        return ngx_read_file(file, buf, size, offset);
    }

    ngx_log_error(NGX_LOG_CRIT, file->log, err,
                  "io_submit(\"%V\") failed", &file->name);

    if (err == NGX_ENOSYS) {
        ngx_file_aio = 0;
        return ngx_read_file(file, buf, size, offset);
    }

    return NGX_ERROR;
}

io_setup函数:

static int
io_setup(u_int nr_reqs, aio_context_t *ctx)
{
    return syscall(SYS_io_setup, nr_reqs, ctx);
}

io_destroy函数:

static int
io_destroy(aio_context_t ctx)
{
    return syscall(SYS_io_destroy, ctx);
}

io_getevents函数:

static int
io_getevents(aio_context_t ctx, long min_nr, long nr, struct io_event *events,
    struct timespec *tmo)
{
    return syscall(SYS_io_getevents, ctx, min_nr, nr, events, tmo);
}

io_submit函数:

static int
io_submit(aio_context_t ctx, long n, struct iocb **paiocb)
{
    return syscall(SYS_io_submit, ctx, n, paiocb);
}



你可能感兴趣的:(nginx)