本文翻译自:http://www.wangafu.net/~nickm/libevent-book/01_intro.html
大多数的初级编程者都是从阻塞IO调用开始网络编程的。阻塞(同步)IO调用指的是:调用会一直阻塞,不会返回,直到发生下面两种情况之一: 要么操作完成;要么经历相当长的时间,网络协议栈自己放弃。
比如,当在TCP连接上调用connect时,操作系统会发送SYN包到TCP的远端主机。connect会一直阻塞而不返回,直到它接收到了远端主机发来的SYN+ACK包,或者经历太长的时间而自己放弃。
下面是一个简单的使用阻塞网络调用的客户端例子。它链接google,发送简单的HTTP请求,然后将响应输出到stdout。
Example:A simple blocking HTTP client:
/* For sockaddr_in */
#include
/* For socket functions */
#include
/* For gethostbyname */
#include
#include
#include
#include
int main(int c,char **v)
{
const char query[] =
"GET / HTTP/1.0\r\n"
"Host: www.google.com\r\n"
"\r\n";
const char hostname[] = "www.google.com";
struct sockaddr_in sin;
struct hostent *h;
const char *cp;
int fd;
ssize_t n_written, remaining;
char buf[1024];
/* Look up the IP address for the hostname. Watch out; this isn't
threadsafe on most platforms. */
h = gethostbyname(hostname);
if (!h) {
fprintf(stderr, "Couldn't lookup%s: %s", hostname, hstrerror(h_errno));
return 1;
}
if (h->h_addrtype != AF_INET) {
fprintf(stderr, "No ipv6 support,sorry.");
return 1;
}
/* Allocate a new socket */
fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) {
perror("socket");
return 1;
}
/* Connect to the remote host. */
sin.sin_family = AF_INET;
sin.sin_port = htons(80);
sin.sin_addr = *(struct in_addr*)h->h_addr;
if (connect(fd, (struct sockaddr*) &sin, sizeof(sin))) {
perror("connect");
close(fd);
return 1;
}
/* Write the query. */
/* XXX Can send succeed partially? */
cp = query;
remaining = strlen(query);
while (remaining) {
n_written = send(fd, cp, remaining, 0);
if (n_written <= 0) {
perror("send");
return 1;
}
remaining -= n_written;
cp += n_written;
}
/* Get an answer back. */
while (1) {
ssize_t result = recv(fd, buf,sizeof(buf), 0);
if (result == 0) {
break;
}elseif (result < 0) {
perror("recv");
close(fd);
return 1;
}
fwrite(buf, 1, result, stdout);
}
close(fd);
return 0;
}
上面例子中,所有的网络调用都是阻塞的:`gethostbyname`直到成功或失败的解析了`www.google.com`才会返回;`connect`直到TCP建链成功了才会返回;`recv`直到收到数据时才会返回;`send`直到将输出flushed到内核的写缓冲区之后才会返回。
当然,阻塞IO并不总是无用的。如果应用程序在同一时刻不需要做其他事,那么阻塞IO同样会很好的工作。
但是,如果你要编写一个需要同时处理多个连接的程序,比如需要从两个连接中读取数据,而且不知道那个连接会先收到数据,那么下面就是一个不好的例子:
BadExample
/* This won't work. */
charbuf[1024];
int i, n;
while(i_still_want_to_read()) {
for (i=0; isizeof(buf), 0);
if (n==0)
handle_close(fd[i]);
else if (n<0)
handle_error(fd[i], errno);
else
handle_input(fd[i], buf, n);
}
}
如果fd[2]上首先有数据到来,但是上面的代码只有在fd[0]和fd[1]上接收到数据之后,才能去处理fd[2]上的数据。
有时,可以通过多线程(进程)来处理这样的问题。一个最简单的方式就是每个链接用一个线程(进程)进行处理。这样每个链接都会有自己的线程(进程)处理,一个链接上的阻塞IO调用就不会影响到其他链接上的处理。
下面就是一个例子:在TCP的40713端口上进行监听的ROT13服务器,每次从输入中接收一行数据,经过简单的处理后进行输出。它使用fork
产生新的进程来处理每个链接。
Example:Forking ROT13 server
/* For sockaddr_in */
#include
/* For socket functions */
#include
#include
#include
#include
#include
#define MAX_LINE 16384
char rot13_char(char c)
{
if ((c >= 'a' && c <= 'm') ||(c >= 'A' && c <= 'M'))
return c + 13;
else if ((c >= 'n' && c <='z') || (c >= 'N' && c <= 'Z'))
return c - 13;
else
return c;
}
void child(int fd)
{
char outbuf[MAX_LINE+1];
size_t outbuf_used = 0;
ssize_t result;
while (1)
{
char ch;
result = recv(fd, &ch, 1, 0);
if (result == 0) {
break;
} else if (result == -1) {
perror("read");
break;
}
/* We do this test to keep the userfrom overflowing the buffer. */
if (outbuf_used < sizeof(outbuf)) {
outbuf[outbuf_used++] = rot13_char(ch);
}
if (ch == '\n') {
send(fd, outbuf, outbuf_used, 0);
outbuf_used = 0;
continue;
}
}
}
void run(void)
{
int listener;
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = 0;
sin.sin_port = htons(40713);
listener = socket(AF_INET, SOCK_STREAM, 0);
#ifndef WIN32
{
int one = 1;
setsockopt(listener, SOL_SOCKET,SO_REUSEADDR, &one, sizeof(one));
}
#endif
if (bind(listener, (struct sockaddr*)&sin, sizeof(sin)) < 0) {
perror("bind");
return;
}
if (listen(listener, 16)<0) {
perror("listen");
return;
}
while (1) {
struct sockaddr_storage ss;
socklen_t slen = sizeof(ss);
int fd = accept(listener, (struct sockaddr*)&ss, &slen);
if (fd < 0) {
perror("accept");
} else {
if (fork() == 0) {
child(fd);
exit(0);
}
}
}
}
int main(int c, char **v)
{
run();
return 0;
}
这样,是否已经完美解决了同一时刻多连接的问题了呢?事实并非如此:
第一,某些平台上,创建新进程(甚至是线程)是十分昂贵的。当然在实际环境中,可以使用线程池,而不是每次都创建新线程。
第二,更重要的是,线程无法如你所愿的规模化使用。如果你的程序需要同时处理成千上万个链接的时候,处理成千上万个线程就不是那么高效的了。
如果线程不是处理多连接的答案,那什么才是呢?
在unix系统上,将socket设置为非阻塞:`fcntl(fd, F_SETFL, O_NONBLOCK)`。一旦将fd置为非阻塞,那么从此刻起,无论何时进行网络调用,该调用会立即返回,要么完成操作,返回成功,要么就是返回一个特定的错误码指出“当前无法完成任务,再试一次”。所以,上面2个链接的例子可以如下写:
BadExample: busy-polling all sockets
/* This will work, but the performance will beunforgivably bad. */
int i, n;
char buf[1024];
for (i=0;i < n_sockets; ++i)
fcntl(fd[i], F_SETFL, O_NONBLOCK);
while(i_still_want_to_read()) {
for (i=0; i < n_sockets; ++i) {
n = recv(fd[i], buf, sizeof(buf), 0);
if (n == 0) {
handle_close(fd[i]);
}else if (n < 0) {
if (errno == EAGAIN)
; /* The kernel didn't haveany data for us to read. */
else
handle_error(fd[i], errno);
} else {
handle_input(fd[i], buf, n);
}
}
}
上面就是使用非阻塞sockets的例子,它虽然可以工作,但是效率却很差,两个原因:第一,当每个链接都没有数据可读的时候,就会无限的轮训下去,用尽所有的CPU周期。第二,如果需要处理多个链接,那么不管是否有数据可读,每个链接都会进行一次内核调用。
所以,我们需要一种方法,可以告诉内核“一直等待,直到某个socket已经有准备好了,而且要告诉我那个socket准备好了”。
古老的解决方法是使用select
,目前仍在使用。select
使用三个socket fd集合(位数组):可读、可写和异常。它会一直等待,直到集合中的某一个socket已经准备好了,而且,select返回时,会更改集合,使其只包含那些已经准备好了的socket fd。使用select
的例子如下:
Example:Using select
/* If youonly have a couple dozen fds, this version won't be awful */
fd_setreadset;
int i, n;
charbuf[1024];
while (i_still_want_to_read()){
int maxfd = -1;
FD_ZERO(&readset);
/* Add all of the interesting fds toreadset */
for (i=0; i < n_sockets; ++i) {
if (fd[i]>maxfd) maxfd = fd[i];
FD_SET(fd[i], &readset);
}
/*Wait until one or more fds are ready to read */
select(maxfd+1, &readset, NULL, NULL,NULL);
/* Process all of the fds that are stillset in readset */
for (i=0; i < n_sockets; ++i) {
if (FD_ISSET(fd[i], &readset)) {
n = recv(fd[i], buf, sizeof(buf),0);
if (n == 0) {
handle_close(fd[i]);
} else if (n < 0) {
if (errno == EAGAIN)
; /* The kernel didn'thave any data for us to read. */
else
handle_error(fd[i],errno);
} else {
handle_input(fd[i], buf, n);
}
}
}
}
一个完整的使用select
的ROT13的服务器例子如下:
/* Forsockaddr_in */
#include
/* For socketfunctions */
#include
/* Forfcntl */
#include
/* forselect */
#include
#include
#include
#include
#include
#include
#include
#defineMAX_LINE 16384
char rot13_char(charc)
{
/* We don't want to use isalpha here;setting the locale would change
* which characters are consideredalphabetical. */
if ((c >= 'a' && c <= 'm') ||(c >= 'A' && c <= 'M'))
return c + 13;
else if ((c >= 'n' && c <='z') || (c >= 'N' && c <= 'Z'))
return c - 13;
else
return c;
}
structfd_state {
char buffer[MAX_LINE];
size_t buffer_used;
int writing;
size_t n_written;
size_t write_upto;
};
structfd_state * alloc_fd_state(void)
{
struct fd_state *state =malloc(sizeof(struct fd_state));
if (!state)
return NULL;
state->buffer_used = state->n_written= state->writing =
state->write_upto = 0;
return state;
}
void free_fd_state(structfd_state *state)
{
free(state);
}
void make_nonblocking(intfd)
{
fcntl(fd, F_SETFL, O_NONBLOCK);
}
int do_read(intfd, struct fd_state *state)
{
char buf[1024];
int i;
ssize_t result;
while (1) {
result = recv(fd, buf, sizeof(buf), 0);
if (result <= 0)
break;
for (i=0; i < result; ++i) {
if (state->buffer_used state->buffer))
state->buffer[state->buffer_used++] = rot13_char(buf[i]);
if (buf[i] == '\n') {
state->writing = 1;
state->write_upto =state->buffer_used;
}
}
}
if (result == 0) {
return 1;
} else if (result < 0) {
if (errno == EAGAIN)
return 0;
return -1;
}
return 0;
}
int do_write(intfd, struct fd_state *state)
{
while (state->n_written <state->write_upto) {
ssize_t result = send(fd,state->buffer + state->n_written,
state->write_upto - state->n_written, 0);
if (result < 0) {
if (errno == EAGAIN)
return 0;
return -1;
}
assert(result != 0);
state->n_written += result;
}
if (state->n_written ==state->buffer_used)
state->n_written =state->write_upto = state->buffer_used = 0;
state->writing = 0;
return 0;
}
void run(void)
{
int listener;
struct fd_state *state[FD_SETSIZE];
struct sockaddr_in sin;
int i, maxfd;
fd_set readset, writeset, exset;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = 0;
sin.sin_port = htons(40713);
for (i = 0; i < FD_SETSIZE; ++i)
state[i] = NULL;
listener = socket(AF_INET, SOCK_STREAM, 0);
make_nonblocking(listener);
#ifndefWIN32
{
int one = 1;
setsockopt(listener, SOL_SOCKET,SO_REUSEADDR, &one, sizeof(one));
}
#endif
if (bind(listener, (structsockaddr*)&sin, sizeof(sin)) < 0) {
perror("bind");
return;
}
if (listen(listener, 16)<0) {
perror("listen");
return;
}
FD_ZERO(&readset);
FD_ZERO(&writeset);
FD_ZERO(&exset);
while (1) {
maxfd = listener;
FD_ZERO(&readset);
FD_ZERO(&writeset);
FD_ZERO(&exset);
FD_SET(listener, &readset);
for (i=0; i < FD_SETSIZE; ++i) {
if (state[i]) {
if (i > maxfd)
maxfd = i;
FD_SET(i, &readset);
if (state[i]->writing) {
FD_SET(i, &writeset);
}
}
}
if (select(maxfd+1, &readset,&writeset, &exset, NULL) < 0) {
perror("select");
return;
}
if (FD_ISSET(listener, &readset)) {
struct sockaddr_storage ss;
socklen_t slen = sizeof(ss);
int fd = accept(listener, (structsockaddr*)&ss, &slen);
if (fd < 0) {
perror("accept");
} else if (fd > FD_SETSIZE) {
close(fd);
} else {
make_nonblocking(fd);
state[fd] = alloc_fd_state();
assert(state[fd]);/*XXX*/
}
}
for (i=0; i < maxfd+1; ++i) {
int r = 0;
if (i == listener)
continue;
if (FD_ISSET(i, &readset)) {
r = do_read(i, state[i]);
}
if (r == 0 && FD_ISSET(i,&writeset)) {
r = do_write(i, state[i]);
}
if (r) {
free_fd_state(state[i]);
state[i] = NULL;
close(i);
}
}
}
}
int main(intc, char **v)
{
setvbuf(stdout, NULL, _IONBF, 0);
run();
return 0;
}
但是问题还没有解决。因为产生和读取select的位数组耗费的时间与最大的socket fd数成正比,所以当socket fd数变得很大时,select调用的性能就会下降很多。
不同的操作系统都提供了不同的select
替代函数。包括poll
,epoll
,kqueue
, evports
和/dev/poll
。所有这些接口都具有比select
更好的性能,而且除了poll
之外,他们在增加socket
,删除socket
,通知哪个socket
准备好这些方面,都可以达到O(1)的性能。
不幸的是,所有这些不同的接口都没有形成标准。linux提供了epoll
,BSDs提供了kqueue
,Solaris提供了evports
和/dev/poll
,而且这些操作系统提供的接口相互独立。所以,当你需要编写一个可移植的、高性能异步应用时,你需要一个封装所有这些接口的抽象,而且提供那个最高效的接口。
这就是libeventAPI
能提供的最底层的功能。它提供了一系列的select
替代接口,并且使用当前操作系统所具有的,最高效的版本。
下面是另一个ROT13服务器的例子。该实例使用libevent2
替代select
。去除了fd_sets
,而是使用event_base
添加和删除事件,当然这是通过poll
,epoll
,kqueue
等来实现的。
Example:A low-level ROT13 server with Libevent
/* Forsockaddr_in */
#include
/* Forsocket functions */
#include
/* Forfcntl */
#include
#include
#include
#include
#include
#include
#include
#include
#define MAX_LINE16384
voiddo_read(evutil_socket_t fd, short events, void *arg);
voiddo_write(evutil_socket_t fd, short events, void *arg);
char
rot13_char(charc)
{
/* We don't want to use isalpha here;setting the locale would change
* which characters are consideredalphabetical. */
if ((c >= 'a' && c <= 'm') ||(c >= 'A' && c <= 'M'))
return c + 13;
else if ((c >= 'n' && c <='z') || (c >= 'N' && c <= 'Z'))
return c - 13;
else
return c;
}
structfd_state {
char buffer[MAX_LINE];
size_t buffer_used;
size_t n_written;
size_t write_upto;
struct event *read_event;
struct event *write_event;
};
structfd_state * alloc_fd_state(struct event_base *base, evutil_socket_t fd)
{
struct fd_state *state =malloc(sizeof(struct fd_state));
if (!state)
return NULL;
state->read_event = event_new(base, fd,EV_READ|EV_PERSIST, do_read, state);
if (!state->read_event) {
free(state);
return NULL;
}
state->write_event = event_new(base, fd,EV_WRITE|EV_PERSIST, do_write, state);
if (!state->write_event) {
event_free(state->read_event);
free(state);
return NULL;
}
state->buffer_used = state->n_written= state->write_upto = 0;
assert(state->write_event);
return state;
}
void free_fd_state(structfd_state *state)
{
event_free(state->read_event);
event_free(state->write_event);
free(state);
}
void do_read(evutil_socket_tfd, short events, void *arg)
{
struct fd_state *state = arg;
char buf[1024];
int i;
ssize_t result;
while (1) {
assert(state->write_event);
result = recv(fd, buf, sizeof(buf), 0);
if (result <= 0)
break;
for (i=0; i < result; ++i) {
if (state->buffer_used state->buffer))
state->buffer[state->buffer_used++] = rot13_char(buf[i]);
if (buf[i] == '\n') {
assert(state->write_event);
event_add(state->write_event,NULL);
state->write_upto =state->buffer_used;
}
}
}
if (result == 0) {
free_fd_state(state);
} else if (result < 0) {
if (errno == EAGAIN) // XXXX use evutilmacro
return;
perror("recv");
free_fd_state(state);
}
}
void do_write(evutil_socket_tfd, short events, void *arg)
{
struct fd_state *state = arg;
while (state->n_written <state->write_upto) {
ssize_t result = send(fd,state->buffer + state->n_written,
state->write_upto - state->n_written, 0);
if (result < 0) {
if (errno == EAGAIN) // XXX useevutil macro
return;
free_fd_state(state);
return;
}
assert(result != 0);
state->n_written += result;
}
if (state->n_written ==state->buffer_used)
state->n_written =state->write_upto = state->buffer_used = 1;
event_del(state->write_event);
}
void do_accept(evutil_socket_tlistener, short event, void *arg)
{
struct event_base *base = arg;
struct sockaddr_storage ss;
socklen_t slen = sizeof(ss);
int fd = accept(listener, (structsockaddr*)&ss, &slen);
if (fd < 0) { // XXXX eagain??
perror("accept");
} else if (fd > FD_SETSIZE) {
close(fd); // XXX replace all closeswith EVUTIL_CLOSESOCKET */
} else {
struct fd_state *state;
evutil_make_socket_nonblocking(fd);
state = alloc_fd_state(base, fd);
assert(state); /*XXX err*/
assert(state->write_event);
event_add(state->read_event, NULL);
}
}
void run(void)
{
evutil_socket_t listener;
struct sockaddr_in sin;
struct event_base *base;
struct event *listener_event;
base = event_base_new();
if (!base)
return; /*XXXerr*/
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = 0;
sin.sin_port = htons(40713);
listener = socket(AF_INET, SOCK_STREAM, 0);
evutil_make_socket_nonblocking(listener);
#ifndefWIN32
{
int one = 1;
setsockopt(listener, SOL_SOCKET,SO_REUSEADDR, &one, sizeof(one));
}
#endif
if (bind(listener, (structsockaddr*)&sin, sizeof(sin)) < 0) {
perror("bind");
return;
}
if (listen(listener, 16)<0) {
perror("listen");
return;
}
listener_event = event_new(base, listener,EV_READ|EV_PERSIST, do_accept, (void*)base);
/*XXX check it */
event_add(listener_event, NULL);
event_base_dispatch(base);
}
int main(intc, char **v)
{
setvbuf(stdout, NULL, _IONBF, 0);
run();
return 0;
}
(上面的代码需要注意的是,使用evutil_socket_t
,而不是int
作为socket
的类型;使用evutil_make_socket_nonblocking
而不是fcntl(O_NONBLOCK)
,将socket
转为非阻塞。这些改变使得我们的代码可以兼容win32平台下的网络API。)
我们的代码虽然更加高效了,但是也变得更加复杂了。回到我们使用fork
的例子,我们没有为每个链接都管理一个缓存,我们只是在每个进程上使用了独立的栈缓存。实际上,我们无需明确的跟踪哪个socket
在读或写,在代码中它是隐含的。我们也无需一个跟踪多少操作已经完成的结构体,我们可以仅仅使用循环和栈变量即可。
另外,如果你对windows上的网络编程很熟悉,则可以看出,使用libevent
的上面的例子没有达到最佳的性能。在Windows上,高效的异步IO与并不是类似于select那样的机制,而是使用IOCP(IO Completion Ports)API
。与其他高效网络API不同的是,IOCP
并不通知你的程序哪个socket
已经准备好操作了,相反的,程序告诉windows
网络栈开始一个网络操作,而IOCP
告诉程序操作已经完成了。
幸运的是,libevent2
的bufferevents
接口可以解决上面的问题:它使得程序编写更加简单,而且可以在windows
上、unix
上都提供最高效的接口。下面是最后一个ROT13服务器的例子,它使用了bufferevents API
:
Example:A simpler ROT13 server with Libevent
/* Forsockaddr_in */
#include
/* Forsocket functions */
#include
/* Forfcntl */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#defineMAX_LINE 16384
void do_read(evutil_socket_tfd, short events, void *arg);
voiddo_write(evutil_socket_t fd, short events, void *arg);
char rot13_char(char c)
{
/* We don't want to use isalpha here;setting the locale would change
* which characters are consideredalphabetical. */
if ((c >= 'a' && c <= 'm') ||(c >= 'A' && c <= 'M'))
return c + 13;
else if ((c >= 'n' && c <='z') || (c >= 'N' && c <= 'Z'))
return c - 13;
else
return c;
}
void readcb(struct bufferevent *bev, void *ctx)
{
struct evbuffer *input, *output;
char *line;
size_t n;
int i;
input = bufferevent_get_input(bev);
output = bufferevent_get_output(bev);
while ((line = evbuffer_readln(input, &n,EVBUFFER_EOL_LF))) {
for (i = 0; i < n; ++i)
line[i] = rot13_char(line[i]);
evbuffer_add(output, line, n);
evbuffer_add(output, "\n",1);
free(line);
}
if (evbuffer_get_length(input) >=MAX_LINE) {
/* Too long; just process what there isand go on so that the buffer
* doesn't grow infinitely long. */
char buf[1024];
while (evbuffer_get_length(input)) {
int n = evbuffer_remove(input, buf,sizeof(buf));
for (i = 0; i < n; ++i)
buf[i] = rot13_char(buf[i]);
evbuffer_add(output, buf, n);
}
evbuffer_add(output, "\n",1);
}
}
void errorcb(struct bufferevent *bev, short error,void *ctx)
{
if (error & BEV_EVENT_EOF) {
/* connection has been closed, do anyclean up here */
/* ... */
} else if (error & BEV_EVENT_ERROR) {
/* check errno to see what erroroccurred */
/* ... */
} else if (error & BEV_EVENT_TIMEOUT) {
/* must be a timeout event handle,handle it */
/* ... */
}
bufferevent_free(bev);
}
void do_accept(evutil_socket_t listener, shortevent, void *arg)
{
struct event_base *base = arg;
struct sockaddr_storage ss;
socklen_t slen = sizeof(ss);
int fd = accept(listener, (structsockaddr*)&ss, &slen);
if (fd < 0) {
perror("accept");
} else if (fd > FD_SETSIZE) {
close(fd);
} else {
struct bufferevent *bev;
evutil_make_socket_nonblocking(fd);
bev = bufferevent_socket_new(base, fd,BEV_OPT_CLOSE_ON_FREE);
bufferevent_setcb(bev, readcb, NULL,errorcb, NULL);
bufferevent_setwatermark(bev, EV_READ,0, MAX_LINE);
bufferevent_enable(bev,EV_READ|EV_WRITE);
}
}
void run(void)
{
evutil_socket_t listener;
struct sockaddr_in sin;
struct event_base *base;
struct event *listener_event;
base = event_base_new();
if (!base)
return; /*XXXerr*/
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = 0;
sin.sin_port = htons(40713);
listener = socket(AF_INET, SOCK_STREAM, 0);
evutil_make_socket_nonblocking(listener);
#ifndefWIN32
{
int one = 1;
setsockopt(listener, SOL_SOCKET,SO_REUSEADDR, &one, sizeof(one));
}
#endif
if (bind(listener, (structsockaddr*)&sin, sizeof(sin)) < 0) {
perror("bind");
return;
}
if (listen(listener, 16)<0) {
perror("listen");
return;
}
listener_event = event_new(base, listener,EV_READ|EV_PERSIST, do_accept, (void*)base);
/*XXX check it */
event_add(listener_event, NULL);
event_base_dispatch(base);
}
int main(intc, char **v)
{
setvbuf(stdout, NULL, _IONBF, 0);
run();
return 0;
}