UtilBox(ub)基础组件 -- epoll_server网络事件模型

今天事情比较少,突然在在网上看到了一篇关于网络同步异步IO的帖子,正好想起了前几天分析过的Redis的代码。Redis的代码很精练也很轻巧,基本没有第三方以来的库(最新版本中加入了jemalloc,但已集成在了redis的src里,所以依旧可以直接make),并实现一套轻量型的非阻塞半异步框架-aeEvent(很少有大型互联网后台应用采用全异步框架,一是逻辑和IO都是异步的这样即时性较低,二是编程难度较高)!

完整的分析过了redis的aeEvent,也对比了一下memcached用的libevent。感觉却是比libevent轻量些,两者在linux下都是用epoll实现(定时器的实现细节略有不同,libevent是用最小堆管理,aeEvent是链表,后期redis可能改进)。大体思想基本类似,由于redis是单线程模型,据作者说是考虑到线程锁的问题。两者暂时没有做过效率对比,之后有机会可以跑些数据。

看了redis之后,自己也做了一个微型的基于epoll的event_server模型,可以应用在一些自己环境之中,同样采用单线程infinite_loop的方式,通过epoll_ctl注册和删除需要关注的file descript (fd) ,然后通过epoll_wait来循环等待IO事件,触发记录在该fd上的write或者read回调函数(通过附加结构体实现)。


[cpp] view plain copy print ?
  1. /*

  2. * =====================================================================================

  3. *

  4. * Filename: epoll_server.c

  5. *

  6. * Description: A example for Linux epoll

  7. *

  8. * Version: 1.0

  9. * Created: 03/28/2012 03:40:37 PM

  10. * Revision: none

  11. * Compiler: gcc

  12. *

  13. * Author: Michael LiuXin,

  14. * Organization:

  15. *

  16. * =====================================================================================

  17. */

  18. #include <sys/socket.h>

  19. #include <sys/epoll.h>

  20. #include <netinet/in.h>

  21. #include <arpa/inet.h>

  22. #include <fcntl.h>

  23. #include <unistd.h>

  24. #include <stdio.h>

  25. #include <errno.h>

  26. #include <stdlib.h>

  27. #include <assert.h>

  28. #include <netinet/tcp.h>

  29. #include <string.h>

  30. #include <strings.h>

  31. #define MAX_EVENTS 500

  32. #define EPOLL_WAIT_TIMEOUT 1000

  33. #define EVENT_READABLE 1

  34. #define EVENT_WRITEABLE 2

  35. struct event_server;

[cpp] view plain copy print ?
  1. // 回调在fd上的write和read的callback函数指针

  2. typedefvoid (*event_handler)(struct event_server*,int);

[cpp] view plain copy print ?
  1. // 每个fd一个的event结构

  2. struct event_t

  3. {

  4. unsigned char mask;<span style="white-space:pre"> </span>// 标志WRITE和READ的掩码

  5. event_handler read;

  6. event_handler write;

  7. void* data;

  8. };

[cpp] view plain copy print ?
  1. /**

  2. * Event server structure , maintain a core Events

  3. */

  4. struct event_server

  5. {

  6. int epfd;<span style="white-space:pre"> </span>// epoll_create的fd

  7. int is_blocking;<span style="white-space:pre"> </span>// 设置非阻塞

  8. struct epoll_event events[MAX_EVENTS];<span style="white-space:pre"> </span>// 数组实现某个fd的结构的索引,也可以用Hash

  9. struct event_t events_set[MAX_EVENTS];

  10. unsigned long loops;

  11. };

[cpp] view plain copy print ?
  1. // 创建一个event_server

  2. struct event_server* create_server()

  3. {

  4. struct event_server *server = (struct event_server*)malloc(sizeof(*server));

  5. memset(server,0,sizeof(*server));

  6. // just a hint for kernel

  7. server->epfd = epoll_create(1024);

  8. if (-1 != server->epfd) {

  9. printf("ok=create_server\n");

  10. return server;

  11. } else {

  12. printf("err=create_server\n");

  13. return NULL;

  14. }

  15. }

[cpp] view plain copy print ?
  1. // 反注册一个fd的event

  2. int unregister_server_event(struct event_server* server, int fd, int type)

  3. {

  4. struct epoll_event ev;

  5. ev.data.fd = fd;

  6. ev.events = server->events_set[fd].mask;

  7. if (type & EVENT_WRITEABLE)

  8. ev.events &= ~EPOLLOUT ;

  9. if (type & EVENT_READABLE)

  10. ev.events &= ~EPOLLIN;

  11. // if there is no event then delelte , otherwise modify

  12. int op = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL ;

  13. // to listen the fd

  14. if (-1 == epoll_ctl(server->epfd,op,fd,&ev)) {

  15. printf("err=epoll_ctl_del\n");

  16. return -1;

  17. } else {

  18. // record the read/write callback-function

  19. // use it at epoll_wait call

  20. server->events_set[fd].read = (type&EVENT_READABLE)?NULL:server->events_set[fd].read;

  21. server->events_set[fd].write = (type&EVENT_WRITEABLE)?NULL:server->events_set[fd].write;

  22. server->events_set[fd].mask = ev.events;

  23. //printf("ok=epoll_ctl_%s\n",op==EPOLL_CTL_MOD?"mod":"del");

  24. return 0;

  25. }

  26. }

[cpp] view plain copy print ?
  1. // 注册一个fd的event

  2. int register_server_event(struct event_server* server, int fd, int type, event_handler fun)

  3. {

  4. assert(fd);

  5. assert(type);

  6. assert(fun);

  7. struct epoll_event ev = {0};

  8. ev.data.fd = fd;

  9. ev.events |= server->events_set[fd].mask;

  10. if (type & EVENT_WRITEABLE)

  11. ev.events |= EPOLLOUT ;

  12. if (type & EVENT_READABLE)

  13. ev.events |= EPOLLIN;

  14. // to listen the fd MOD or ADD

  15. int op = server->events_set[fd].mask ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;

  16. if (-1 == epoll_ctl(server->epfd,op,fd,&ev)) {

  17. printf("err=epoll_ctl_%s\n",server->events_set[fd].mask?"mod":"add");

  18. return -1;

  19. } else {

  20. // record the read/write callback-function

  21. // use it at epoll_wait call

  22. //printf("ok=epoll_ctl_%s\n",server->events_set[fd].mask?"mod":"add");

  23. type&EVENT_READABLE ? server->events_set[fd].read=fun : 0;

  24. type&EVENT_WRITEABLE ? server->events_set[fd].write=fun : 0;

  25. server->events_set[fd].mask = ev.events;

  26. return 0;

  27. }

  28. }

[cpp] view plain copy print ?
  1. // 阻塞非阻塞,基本的网络模型为防止read阻塞都采用nonblocking

  2. void set_server_nonblocking(struct event_server* server)

  3. {

  4. server->is_blocking = 0;

  5. }

  6. void set_server_blocking(struct event_server* server)

  7. {

  8. server->is_blocking = 1;

  9. }

[cpp] view plain copy print ?
  1. // infinite loop 事件循环,线程在此循环

  2. void run(struct event_server* server)

  3. {

  4. // do an infinite loop for epoll_wait

  5. while(1) {

  6. //printf("before_epoll_wait\n");

  7. int n = epoll_wait(server->epfd,server->events,MAX_EVENTS,EPOLL_WAIT_TIMEOUT);

  8. //printf("after_epoll_wait\n");

  9. if (0 == n) {

  10. //printf("Waiting<%lu>...\n",server->loops++);

  11. continue;

  12. }

  13. //printf("for_each<%d>\n",n);

  14. for (int i=0; i!=n; i++) {

  15. assert(server->events[i].data.fd);

  16. assert(server->events_set[server->events[i].data.fd].mask);

  17. if ((server->events[i].events & EPOLLIN) && server->events_set[server->events[i].data.fd].read) {

  18. server->events_set[server->events[i].data.fd].read(server,server->events[i].data.fd);

  19. continue;

  20. }

  21. if ((server->events[i].events & EPOLLOUT) && server->events_set[server->events[i].data.fd].write) {

  22. server->events_set[server->events[i].data.fd].write(server,server->events[i].data.fd);

  23. continue;

  24. }

  25. }

  26. }

  27. }

[cpp] view plain copy print ?
  1. // write事件回调函数

  2. void tcp_write(struct event_server* server, int clientfd)

  3. {

  4. int length = strlen("received");

  5. while(1) {

  6. int n = write(clientfd,"received",length);

  7. if (-1==n && errno==EAGAIN)

  8. break;

  9. length -= n;

  10. //printf("ok=write_client<%d>\n",n);

  11. if (length <= 0)

  12. break;

  13. }

  14. unregister_server_event(server,clientfd,EVENT_WRITEABLE);

  15. }

[cpp] view plain copy print ?
  1. // read事件回调函数

  2. void tcp_read(struct event_server* server, int clientfd)

  3. {

  4. /**

  5. * this method is called by epoll_wait callback if there has

  6. * something to read in buffer

  7. */

  8. char buf[1024] = {0};

  9. int ret = -1;

  10. while(1) {

  11. ret=read(clientfd,buf,1024);

  12. if (0 == ret) {

  13. unregister_server_event(server,clientfd,EVENT_READABLE|EVENT_WRITEABLE);

  14. close(clientfd);

  15. printf("ok=client_quit\n");

  16. break;

  17. }

  18. if (-1==ret && errno==EAGAIN)

  19. break;

  20. //printf("ok=read_from_%d<%d>:\"%s\"\n",clientfd,ret,buf);

  21. //register_server_event(server,clientfd,EVENT_WRITEABLE,tcp_write);

  22. }

  23. }

[cpp] view plain copy print ?
  1. // server的socket fd的回调函数。只负责accept并注册

  2. void tcp_accept(struct event_server* server, int server_socket)

  3. {

  4. // the server must be accepted

  5. int cfd = accept(server_socket,NULL,NULL);

  6. if (cfd) {

  7. int flag = fcntl(cfd,F_GETFL,0);

  8. // nonblocking

  9. flag |= O_NONBLOCK;

  10. if (-1 == fcntl(cfd,F_SETFL,flag))

  11. printf("err=set_nonblocking\n");

  12. // no delay (without nagle)

  13. int nodelay = 1;

  14. if (-1 == setsockopt(cfd,IPPROTO_TCP,TCP_NODELAY,&nodelay,sizeof(nodelay)))

  15. printf("err=set_tcp_no_delay\n");

  16. // add the client_fd to epoll loop

  17. register_server_event(server,cfd,EVENT_READABLE,tcp_read);

  18. } else {

  19. printf("err=accept_socket\n");

  20. }

  21. }

[cpp] view plain copy print ?
  1. // drive function

  2. #define ut_main main

  3. int ut_main()

  4. {

  5. // setup a socket

  6. int server_socket = socket(AF_INET,SOCK_STREAM,0);

  7. if (-1 == server_socket) {

  8. printf("err=create_socket\n");

  9. return -1;

  10. }

  11. else

  12. printf("ok=create_socket\n");

  13. struct sockaddr_in server_addr;

  14. bzero(&server_addr,sizeof(server_addr));

  15. server_addr.sin_family = AF_INET;

  16. server_addr.sin_addr.s_addr = htons(INADDR_ANY);

  17. server_addr.sin_port = htons(9898);

  18. int flag=1,len=sizeof(flag);

  19. // we can reuse the port

  20. setsockopt(server_socket,SOL_SOCKET,SO_REUSEADDR,&flag,len);

  21. if (-1 == setsockopt(server_socket,IPPROTO_TCP,TCP_NODELAY,&flag,sizeof(flag)))

  22. printf("err=set_tcp_no_delay\n");

  23. // bind ip/port

  24. if (-1 == bind(server_socket,(struct sockaddr*)&server_addr,sizeof(server_addr))) {

  25. printf("err=bind_socket\n");return -1; }

  26. else

  27. printf("ok=bind_socket\n");

  28. if (-1 == listen(server_socket,1024)) {

  29. printf("err=listen_socket\n");return -1; }

  30. else

  31. printf("ok=listen_socket\n");

  32. // create a epoll server handle

  33. struct event_server* server = create_server();

  34. set_server_nonblocking(server);

  35. // firstly listen the server's socket with ACCEPT

  36. register_server_event(server,server_socket,EVENT_READABLE,tcp_accept);

  37. // do event loop

  38. run(server);

  39. return 0;

  40. }



有些细节没特别关注,例如Linger之类的问题(readv/writev)暂时不care,后续继续完善。单线程下压力QPS可以打到5万(blocksize很小只是一个字符串,会对数据包的使用率造成和并发造成一定影响)。还没试过多线程(可以一个线程包一个server,或者由server来托管线程)。只是简单做了一个epoll多路复用的引子,基本“画出”了网络框架的影子,其实不管是redis、libevent还是apache、nginx都是以此位基点进行扩展,在上面做线程、并发控制、进程池(apache的prefork)等。


你可能感兴趣的:(linux,UtilBox,基础组件)