redis网络源码

阅读redis代码,就必须试图搞清楚redis的主流程,我们必须用剥洋葱的方法来了解整个代码。redis是个服务器客户端形式的架构,在服务器和客户端下面,就是多路复用IO,为了理解服务器客户端,必须先了解redis使用的多路复用IO,因为这里是redis高效的原因。

redis多路复用IO

redis也是个跨平台代码,同时支持window和linux,本文以linux为准。

五种IO模式

《Unix网络编程》一书中讲tcp/ip总结为5个模式:

  • BIO,阻塞IO的方式,阻塞IO时,accept\read等都是阻塞的,这样在tcp服务端和客户端之间连接是有序的,但是同时也造出效率比较低下。
  • NIO,非阻塞的,例如read时就不会阻塞,而是立即直接返回AE_ERR,调用者必须重试,直到read成功后进行数据处理。
    -多路复用IO,linux采用epoll的方式,每次会讲状态变化的fd,以事件的形式进行通知,
  • signal driven IO
  • AIO,异步IO

redis采用了NIO和多路复用IO。这里这么重要就是因为redis是一个单进程的,但是却拥有非常高的效率,是怎么做到的,所以redis底层的网络设计是必读的。

多路复用IO epoll例程代码

首先需要理解的是多路复用IO,redis linux下使用的是epoll,其原理就是如下epoll例子代码演化的:

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;

int main(int argc, char *argv[])
{
    if (argc != 3)
    {
        cout << "usage: " << argv[0] << " ip port" << endl;
        return -1;
    }

    char *szIp = argv[1];
    in_addr_t iIp = inet_addr(szIp);
    if (iIp == INADDR_NONE)
    {
        cerr << "fail to parse ip: " << szIp << endl;
        return -1;
    }
    char *pEnd = NULL;
    uint16_t usPort = strtoul(argv[2], &pEnd, 10);
    if (*pEnd != '\0')
    {
        cerr << "fail to parse port: " << argv[2] << endl;
        return -1;
    }

    int iSockFd = socket(AF_INET, SOCK_STREAM, 0);
    if (iSockFd < 0)
    {
        cerr << "fail to create socket, err: " << strerror(errno) << endl;
        return -1;
    }
    cout << "create socket fd " << iSockFd << endl;

    sockaddr_in oAddr;
    memset(&oAddr, 0, sizeof(oAddr));
    oAddr.sin_family = AF_INET;
    oAddr.sin_addr.s_addr = iIp;
    oAddr.sin_port = htons(usPort);
    if (bind(iSockFd, (sockaddr *)&oAddr, sizeof(oAddr)) < 0)
    {
        cerr << "fail to bind addr " << szIp << ":" << usPort << ", err: " << strerror(errno) << endl;
        return -1;
    }
    cout << "bind addr " << szIp << ":" << usPort << endl;

    if (listen(iSockFd, 100) < 0)
    {
        cerr << "fail to listen on " << szIp << ":" << usPort << ", err: " << strerror(errno) << endl;
    }
    cout << "listen on socket fd " << iSockFd << endl;
    
    int iEpollFd = epoll_create(1024);
    if (iEpollFd < 0)
    {
        cerr << "fail to create epoll, err: " << strerror(errno) << endl;
        return -1;
    }

    epoll_event oEvent;
    oEvent.events = EPOLLIN;
    oEvent.data.fd = iSockFd;
    if (epoll_ctl(iEpollFd, EPOLL_CTL_ADD, iSockFd, &oEvent) < 0)
    {
        cerr << "fail to add listen fd to epoll, err: " << strerror(errno) << endl;
        return -1;
    }

    epoll_event aoEvents[1024];
    uint8_t acRecvBuf[1024 * 1024];
    while (true)
    {
        int iFdCnt = epoll_wait(iEpollFd, aoEvents, 1024, -1);
        if (iFdCnt < 0)
        {
            cerr << "epoll wait error, err: " << strerror(errno) << endl;
            return -1;
        }

        for (int i = 0; i < iFdCnt; i++)
        {
            if (aoEvents[i].data.fd == iSockFd)
            {
                sockaddr_in oClientAddr;
                socklen_t iAddrLen = sizeof(oClientAddr);
                int iAcceptFd = accept(iSockFd, (sockaddr *)&oClientAddr, &iAddrLen);
                if (iAcceptFd < 0)
                {
                    cerr << "fail to accpet, err: " << strerror(errno) << endl;
                    continue;
                }
                cout << "recv connection from " << inet_ntoa(oClientAddr.sin_addr) << ":" << ntohs(oClientAddr.sin_port) << endl;

                oEvent.events = EPOLLIN;
                oEvent.data.fd = iAcceptFd;
                if (epoll_ctl(iEpollFd, EPOLL_CTL_ADD, iAcceptFd, &oEvent) < 0)
                {
                    close(iAcceptFd);
                    cerr << "fail to add fd to epoll, err: " << strerror(errno) << endl;
                    continue;
                }
            }
            else
            {
                int iCurFd = aoEvents[i].data.fd;
                ssize_t iRecvLen = recv(iCurFd, acRecvBuf, sizeof(acRecvBuf), 0);
                if (iRecvLen < 0)
                {
                    cerr << "fail to recv, close connection, err: " << strerror(errno) << endl;
                    if (epoll_ctl(iEpollFd, EPOLL_CTL_DEL, iCurFd, NULL) < 0)
                    {
                        cerr << "fail to del fd from epoll, err: " << strerror(errno) << endl;
                    }
                    close(iCurFd);
                    continue;
                }
                if (iRecvLen == 0)
                {
                    cout << "connection closed by client" << endl;
                    if (epoll_ctl(iEpollFd, EPOLL_CTL_DEL, iCurFd, NULL) < 0)
                    {
                        cerr << "fail to del fd from epoll, err: " << strerror(errno) << endl;
                    }
                    close(iCurFd);
                    continue;
                }
                cout << "recv data len: " << iRecvLen << endl;

                ssize_t iSendLen = send(iCurFd, acRecvBuf, iRecvLen, 0);
                if (iSendLen < 0)
                {
                    cerr << "fail to send, err: " << strerror(errno) << endl;
                    if (epoll_ctl(iEpollFd, EPOLL_CTL_DEL, iCurFd, NULL) < 0)
                    {
                        cerr << "fail to del fd from epoll, err: " << strerror(errno) << endl;
                    }
                    close(iCurFd);
                    break;
                }
                cout << "echo to client, len: " << iSendLen << endl;
            }
        }
    }
}

这个例子代码中,可以看到epoll的基本使用。为了进一步学习,有必要了解一下epoll源码上,有一个red-black tree存取fd句柄信息,同时还有一个readylist,readylist中存放了事件信息,当调用epoll_wait的时候,就会把事件返回,处理这些事件就可以了。

把epoll和select、poll对比一下:

  • select使用数组为数据结构实现监听变化,每次都要拷贝全部的数组句柄,效率较低,而且数组大小有限。
  • poll是对select改进,只不过改用了链表为数据结构,但是问题和select相同。
  • epoll则解决了上述问题,只需拷贝变化的事件的句柄,效率较高。

回到redis网络的源码

这里特别要强调学习epoll的原理和例子代码,其中有一个原因,这里对于事件都要注册回调函数,这些回到函数将代码割裂开了,按照剥洋葱的思路,这些割裂开的地方需要识别出来,否则会影响代码的阅读。

    /* Create the timer callback, this is our way to process many background
     * operations incrementally, like clients timeout, eviction of unaccessed
     * expired keys and so forth. */
    if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
        serverPanic("Can't create event loop timers.");
        exit(1);
    }

    /* Create an event handler for accepting new connections in TCP and Unix
     * domain sockets. */
    for (j = 0; j < server.ipfd_count; j++) {
        if (aeCreateFileEvent(server.el, server.ipfd[j], AE_READABLE,
            acceptTcpHandler,NULL) == AE_ERR)
            {
                serverPanic(
                    "Unrecoverable error creating server.ipfd file event.");
            }
    }
    if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
        acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event.");

  • 回调serverCron用于:
 * - Active expired keys collection (it is also performed in a lazy way on
 *   lookup).
 * - Software watchdog.
 * - Update some statistic.
 * - Incremental rehashing of the DBs hash tables.
 * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
 * - Clients timeout of different kinds.
 * - Replication reconnection.
 * - Many more...
  • 回调acceptTcpHandler的作用可以参见epoll例子代码,用于处理客户端连接。

redis网络代码中值得学习的地方

  • 对于ipv4和ipv6的同时支持
int listenToPort(int port, int *fds, int *count) {
    int j;

    /* Force binding of 0.0.0.0 if no bind address is specified, always
     * entering the loop if j == 0. */
    if (server.bindaddr_count == 0) server.bindaddr[0] = NULL;
    for (j = 0; j < server.bindaddr_count || j == 0; j++) {
        if (server.bindaddr[j] == NULL) {
            int unsupported = 0;
            /* Bind * for both IPv6 and IPv4, we enter here only if
             * server.bindaddr_count == 0. */
            fds[*count] = anetTcp6Server(server.neterr,port,NULL,
                server.tcp_backlog);
            if (fds[*count] != ANET_ERR) {
                anetNonBlock(NULL,fds[*count]);
                (*count)++;
            } else if (errno == EAFNOSUPPORT) {
                unsupported++;
                serverLog(LL_WARNING,"Not listening to IPv6: unsupproted");
            }

            if (*count == 1 || unsupported) {
                /* Bind the IPv4 address as well. */
                fds[*count] = anetTcpServer(server.neterr,port,NULL,
                    server.tcp_backlog);
                if (fds[*count] != ANET_ERR) {
                    anetNonBlock(NULL,fds[*count]);
                    (*count)++;
                } else if (errno == EAFNOSUPPORT) {
                    unsupported++;
                    serverLog(LL_WARNING,"Not listening to IPv4: unsupproted");
                }
            }
            /* Exit the loop if we were able to bind * on IPv4 and IPv6,
             * otherwise fds[*count] will be ANET_ERR and we'll print an
             * error and return to the caller with an error. */
            if (*count + unsupported == 2) break;
        } else if (strchr(server.bindaddr[j],':')) {
            /* Bind IPv6 address. */
            fds[*count] = anetTcp6Server(server.neterr,port,server.bindaddr[j],
                server.tcp_backlog);
        } else {
            /* Bind IPv4 address. */
            fds[*count] = anetTcpServer(server.neterr,port,server.bindaddr[j],
                server.tcp_backlog);
        }
        if (fds[*count] == ANET_ERR) {
            serverLog(LL_WARNING,
                "Creating Server TCP listening socket %s:%d: %s",
                server.bindaddr[j] ? server.bindaddr[j] : "*",
                port, server.neterr);
            return C_ERR;
        }
        anetNonBlock(NULL,fds[*count]);
        (*count)++;
    }
    return C_OK;
}


static int anetV6Only(char *err, int s) {
    int yes = 1;
    if (setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,&yes,sizeof(yes)) == -1) {
        anetSetError(err, "setsockopt: %s", strerror(errno));
        close(s);
        return ANET_ERR;
    }
    return ANET_OK;
}

listen to port,ipv4和ipv6可共用一个端口,可创建ipv6后再创建ipv4的连接,而创建ipv6和ipv4共存的关键是anetV6Only中setsockopt的函数参数。这是一个学习要点。

  • 设置阻塞非阻塞
int anetSetBlock(char *err, int fd, int non_block) {
    int flags;

    /* Set the socket blocking (if non_block is zero) or non-blocking.
     * Note that fcntl(2) for F_GETFL and F_SETFL can't be
     * interrupted by a signal. */
    if ((flags = fcntl(fd, F_GETFL)) == -1) {
        anetSetError(err, "fcntl(F_GETFL): %s", strerror(errno));
        return ANET_ERR;
    }

    if (non_block)
        flags |= O_NONBLOCK;
    else
        flags &= ~O_NONBLOCK;

    if (fcntl(fd, F_SETFL, flags) == -1) {
        anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s", strerror(errno));
        return ANET_ERR;
    }
    return ANET_OK;
}

从源码看,是设置了非阻塞参数。

总结

redis网络源码使用了多路复用epoll编码,从epoll的例子代码演化,很容易得到redis封装好的epoll源码,理解好网络代码,可以:

  • 了解redis为何高效
  • 了解epoll回调,这些回调是割裂代码影响阅读的障碍

同时还需要到了很多网络基础知识,并且:

  • redis对于ipv4和ipv6支持很完善,编程技巧值得学习。
  • 网络编程中有很多参数,redis做到高性能,一个会用了epoll, reactor模式的框架合计,二是对这些参数了如指掌,需要充分了解这些参数进行网络设计。

学习完了在反过来和netty对比,netty也会用了reactor模式,nio,但是netty帮我们封装好了底层源码,因此不太知道netty底层的实现。由于redis采用的是单进程模式,因此有必要对于netty的底层reactor设计以及netty参数再次学习一遍。

你可能感兴趣的:(redis网络源码)