tinyhttpd是一轻量级的web 服务器,最近几天终于抽出时间来研究研究了。其源码百度即可下载,500多行,确实是学习linux编程的好材料。很多网友都写了关于tinyhttpd的博文,但是我还是觉得不够深入,严格说是写得不够深入,往往就是把500多行代码一扔,分析下主要过程,画个流程图就完事了。我怎么觉得还有很多东西可以挖一挖呢,也许还可再调整一下代码,虽然目前也不清楚可调整多少,待我细细道来。
我分析的过程就按主要路线走,也就是这样一个主干道流程:服务器创建socket并监听某一端口->浏览器输入url发出请求->服务器收到请求,创建线程处理请求,主线程继续等待->新线程读取http请求,并解析相关字段,读取文件内容或者执行CGI程序并返回给浏览器->关闭客户端套接字,新线程退出
咱们先来看看main函数
int main(void)
{
int server_sock = -1;
u_short port = 0;
int client_sock = -1;
struct sockaddr_in client_name;
int client_name_len = sizeof(client_name);
pthread_t newthread;
server_sock = startup(&port);
printf("httpd running on port %d\n", port);
while (1)
{
client_sock = accept(server_sock,
(struct sockaddr *)&client_name,
&client_name_len);
if (client_sock == -1)
error_die("accept");
if (pthread_create(&newthread , NULL, accept_request, client_sock) != 0)
perror("pthread_create");
}
close(server_sock);
return(0);
}
int startup(u_short *port)
{
int httpd = 0;
struct sockaddr_in name;
httpd = socket(PF_INET, SOCK_STREAM, 0);
if (httpd == -1)
error_die("socket");
memset(&name, 0, sizeof(name));//也可以用bzero
name.sin_family = AF_INET;
name.sin_port = htons(*port);
name.sin_addr.s_addr = htonl(INADDR_ANY);//任何网络接口
if (bind(httpd, (struct sockaddr *)&name, sizeof(name)) < 0)
error_die("bind");
if (*port == 0) /* if dynamically allocating a port */
{
int namelen = sizeof(name);
if (getsockname(httpd, (struct sockaddr *)&name, &namelen) == -1)
error_die("getsockname");
*port = ntohs(name.sin_port);//系统动态分配一个端口号
}
if (listen(httpd, 5) < 0)
error_die("listen");
return(httpd);//返回服务套接字描述符
}
很常见的步骤,就不多说了。
此后,服务端就accept等待连接,作者其实没有关心客户端来自哪里,那accept的第二、第三参数完全可以为NULL。接着就创建线程把客户端套接字作为参数传过去了,由新线程处理请求,这是服务器编程的常用手段,提高并发性。注意这里的线程函数并不完全合法,至少在linux上就不符合线程函数的原型定义,编译时编译器也只是警告而未报错。
接下来重点就在线程函数accept_request上了
void accept_request(int client)
{
char buf[1024];
int numchars;
char method[255];
char url[255];
char path[512];
size_t i, j;
struct stat st;
int cgi = 0; /* becomes true if server decides this is a CGI
* program */
char *query_string = NULL;
numchars = get_line(client, buf, sizeof(buf));
i = 0; j = 0;
while (!ISspace(buf[j]) && (i < sizeof(method) - 1))
{
method[i] = buf[j];
i++; j++;
}
method[i] = '\0';
if (strcasecmp(method, "GET") && strcasecmp(method, "POST"))
{
unimplemented(client);
return;
}
if (strcasecmp(method, "POST") == 0)
cgi = 1;
i = 0;
while (ISspace(buf[j]) && (j < sizeof(buf)))
j++;
while (!ISspace(buf[j]) && (i < sizeof(url) - 1) && (j < sizeof(buf)))
{
url[i] = buf[j];
i++; j++;
}
url[i] = '\0';
if (strcasecmp(method, "GET") == 0)
{
query_string = url;
while ((*query_string != '?') && (*query_string != '\0'))
query_string++;
if (*query_string == '?')
{
cgi = 1;
*query_string = '\0';
query_string++;
}
}
sprintf(path, "htdocs%s", url);
if (path[strlen(path) - 1] == '/')
strcat(path, "index.html");
if (stat(path, &st) == -1) {
while ((numchars > 0) && strcmp("\n", buf)) /* read & discard headers */
numchars = get_line(client, buf, sizeof(buf));
not_found(client);
}
else
{
if ((st.st_mode & S_IFMT) == S_IFDIR)
strcat(path, "/index.html");
if ((st.st_mode & S_IXUSR) ||
(st.st_mode & S_IXGRP) ||
(st.st_mode & S_IXOTH) )
cgi = 1;
if (!cgi)
serve_file(client, path);
else
execute_cgi(client, path, method, query_string);
}
close(client);
}
GET / HTTP/1.1
HOST:www.abc.com
Content-type:text/html
...
get_line干的事就是读取一行,并且不管原来是以\n还是\r\n结束,均转化为以\n再加\0字符结束。其实现如下:
int get_line(int sock, char *buf, int size)
{
int i = 0;
char c = '\0';
int n;
while ((i < size - 1) && (c != '\n'))
{
n = recv(sock, &c, 1, 0);//从sock中一次读一个字符,循环读
if (n > 0)
{
if (c == '\r') //如果读到回车,一般紧接着字符就是\n
{
n = recv(sock, &c, 1, MSG_PEEK);
if ((n > 0) && (c == '\n'))
recv(sock, &c, 1, 0);//这时再读,c还是\n,循环跳出
else
c = '\n';
}
buf[i] = c;
i++;
}
else
c = '\n';
}
buf[i] = '\0';
return(i);//返回读取的字符数
}
还是获取要访问的url,可以是很常见的/,/index.html等等。该程序默认为根目录是在htdocs下的,且默认文件是index.html。另外还判断了给定文件是否有可执权限,如果有,则认为是CGI程序。最后根据变量cgi的值来进行相应选择:读取静态文件或者执行CGI程序返回结果。
我们首先看看最简单的静态文件情况,调用函数serve_file
void serve_file(int client, const char *filename)
{
FILE *resource = NULL;
int numchars = 1;
char buf[1024];
buf[0] = 'A'; buf[1] = '\0';
while ((numchars > 0) && strcmp("\n", buf)) /* read & discard headers */
numchars = get_line(client, buf, sizeof(buf));//必须要读完客户端发来的头部,否则后来的send不能正常显示在浏览器中。
resource = fopen(filename, "r");
if (resource == NULL)
not_found(client);
else
{
headers(client, filename);
cat(client, resource);
}
fclose(resource);
}
将文件名作为参数,首先读完客户端的头部,然后打开创建文件流。为了模拟http响应,首先向客户端发送头部,头部信息至少包含以下几点:
http/1.0 200 ok
server:
content-type:
\r\n(一个空白行,标识头部结束)
最后发送数据体部分,即文件内容,在cat方法中,fgets每读入一行,就send,直到末尾。headers和cat函数就不在这里列出了。下面,我们来看看一个具体测试例子,紧接着在gdb中调试
我在根目录下的htdocs下建立一个新文件index2.html,内容如下:
我在这里放了一个链接,href部分是关于cgi的,先不管,就只看文本部分能否显示在浏览器中。
首先编译之后直接运行./httpd,程序打印"httpd running on port 53079"
我们在浏览器中访问index2.html文件,如下图所示:
文本能正确显示了。那如何在gdb中调试观察呢?
xiaoqiang@ljq-Lenovo:~/chenshi/tinyhttpd-0.1.0$ gdb attach 7029 【通过ps查看httpd进程的PID,然后gdb attach之】
Attaching to process 7029
Reading symbols from /home/xiaoqiang/chenshi/tinyhttpd-0.1.0/httpd...done.
Reading symbols from /lib/i386-linux-gnu/libpthread.so.0...(no debugging symbols found)...done.
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/i386-linux-gnu/libthread_db.so.1".
Loaded symbols for /lib/i386-linux-gnu/libpthread.so.0
Reading symbols from /lib/i386-linux-gnu/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib/i386-linux-gnu/libc.so.6
Reading symbols from /lib/ld-linux.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib/ld-linux.so.2
0xb7750424 in __kernel_vsyscall ()
(gdb) bt
#0 0xb7750424 in __kernel_vsyscall ()
#1 0xb772dc08 in accept () from /lib/i386-linux-gnu/libpthread.so.0
#2 0x0804a8d6 in main () at httpd.c:516
(gdb) l accept_request
warning: Source file is more recent than executable.
47 /* A request has caused a call to accept() on the server port to
48 * return. Process the request appropriately.
49 * Parameters: the socket connected to the client */
50 /**********************************************************************/
51 void accept_request(int client)
52 {
53 char buf[1024];
54 int numchars;
55 char method[255];
56 char url[255];
(gdb) l
57 char path[512];
58 size_t i, j;
59 struct stat st;
60 int cgi = 0; /* becomes true if server decides this is a CGI
61 * program */
62 char *query_string = NULL;
63
64 numchars = get_line(client, buf, sizeof(buf));//从套接字中读取一行
65 i = 0; j = 0;
66 while (!ISspace(buf[j]) && (i < sizeof(method) - 1))
(gdb) b 64 【在64行设置断点,观察读到的是什么】
Breakpoint 1 at 0x8048b3f: file httpd.c, line 64.
(gdb) c
Continuing. 【直到在浏览器中发起了请求,后面的才会打印出来】
[New Thread 0xb63feb40 (LWP 7655)]
[Switching to Thread 0xb63feb40 (LWP 7655)]
Breakpoint 1, accept_request (client=4) at httpd.c:64
64 numchars = get_line(client, buf, sizeof(buf));//从套接字中读取一行
(gdb) n
65 i = 0; j = 0;
(gdb) p buf 【打印读到的一行】
$1 = "GET /index2.html HTTP/1.1\n", '\000' 【果真是HTTP GET请求的第一行】
(gdb) l
60 int cgi = 0; /* becomes true if server decides this is a CGI
61 * program */
62 char *query_string = NULL;
63
64 numchars = get_line(client, buf, sizeof(buf));//从套接字中读取一行
65 i = 0; j = 0;
66 while (!ISspace(buf[j]) && (i < sizeof(method) - 1))
67 {
68 method[i] = buf[j];
69 i++; j++;
(gdb) l
70 }
71 method[i] = '\0';//获取到了HTTP方法
72
73 if (strcasecmp(method, "GET") && strcasecmp(method, "POST"))
74 {
75 //忽略大小写比较
76 unimplemented(client);
77 return;//尚未支持的请求方法,线程返回
78 }
79
(gdb) l serve_file 【其它的细节调试就不在这里演示了,直接跳到serve_file里】
412 * Parameters: a pointer to a file structure produced from the socket
413 * file descriptor
414 * the name of the file to serve */
415 /**********************************************************************/
416 void serve_file(int client, const char *filename)
417 {
418 FILE *resource = NULL;
419 int numchars = 1;
420 char buf[1024];
421
(gdb) l
422 buf[0] = 'A'; buf[1] = '\0';
423 while ((numchars > 0) && strcmp("\n", buf)) /* read & discard headers */
424 numchars = get_line(client, buf, sizeof(buf));
425
426 resource = fopen(filename, "r");
427 if (resource == NULL)
428 not_found(client);
429 else
430 {
431 headers(client, filename);
(gdb) b 426 【在426行设置断点】
Breakpoint 2 at 0x804a247: file httpd.c, line 426.
(gdb) c
Continuing.
Breakpoint 2, serve_file (client=4, filename=0xb63fdf4e "htdocs/index2.html") at httpd.c:426
426 resource = fopen(filename, "r");
(gdb) p filename
$2 = 0xb63fdf4e "htdocs/index2.html"
(gdb) n
427 if (resource == NULL)
(gdb) n
431 headers(client, filename);
(gdb) n
432 cat(client, resource);
(gdb) s 【进入cat里面看看】
cat (client=4, resource=0xb6c00468) at httpd.c:170
170 {
(gdb) l
165 * easier just to do something like pipe, fork, and exec("cat").
166 * Parameters: the client socket descriptor
167 * FILE pointer for the file to cat */
168 /**********************************************************************/
169 void cat(int client, FILE *resource)
170 {
171 char buf[1024];
172
173 fgets(buf, sizeof(buf), resource);
174 while (!feof(resource))
(gdb) n
173 fgets(buf, sizeof(buf), resource);
(gdb) n
174 while (!feof(resource))
(gdb) p buf 【讲到了index2.html的一行,然后send】
$3 = "Display Date\n", '\000' , "\"\225^\267\000\000\000\000 \312q\267\000\320t\267 \000\000\000 \312q\267\304Re\267 \000\000\000El^\267\001\000\000\000\000\320t\267 \000\000\000\364\277q\267\360\331?\266V\003_\267\364\277q\267 \000\000\000 \312q\267\000\320t\267\000\000\000\000$k^\267 \312q\267\000\320t\267 ", '\000' , "A\252\004\b\364\277q\267 \000\000\000\377\377\377\377\000\000\000\000\236\201^\267 ", '\000' , " \312q\267U\205^\267 \312q\267\000\320t\267 ", '\000' "\364, \277q\267\001\000\000\000R\252\004\b\000\000\000\000\343v^\267"...
(gdb) n
176 send(client, buf, strlen(buf), 0);
(gdb) n
177 fgets(buf, sizeof(buf), resource);
(gdb) n
174 while (!feof(resource))
(gdb) n
179 }
(gdb) n
serve_file (client=4, filename=0xb63fdf4e "htdocs/index2.html") at httpd.c:434
434 fclose(resource);
(gdb) bt
#0 serve_file (client=4, filename=0xb63fdf4e "htdocs/index2.html") at httpd.c:434
#1 0x08048f83 in accept_request (client=4) at httpd.c:130
#2 0xb7726d4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
#3 0xb7665b8e in clone () from /lib/i386-linux-gnu/libc.so.6
(gdb) n
435 }
(gdb) s
accept_request (client=4) at httpd.c:139
139 close(client); 【直到运行在这里,浏览器的请求才会真正停止,意味着标签栏那个不断旋转的标志就停了】
(gdb) s
140 }
(gdb) s
0xb7726d4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
(gdb) s
Single stepping until exit from function start_thread,
which has no line number information.
[New Thread 0xb5bfdb40 (LWP 7656)]
[Switching to Thread 0xb5bfdb40 (LWP 7656)]
Breakpoint 1, accept_request (client=4) at httpd.c:64
64 numchars = get_line(client, buf, sizeof(buf));//从套接字中读取一行
(gdb) n
[Thread 0xb63feb40 (LWP 7655) exited]
65 i = 0; j = 0;
(gdb) p buf
$4 = "GET /favicon.ico HTTP/1.1\n", '\000' 【再读一行时,竟读到favicon.ico,目前没弄明白这怎么回事】
(gdb)
test.sh脚本如下:
#!/bin/sh Server Time:$time"
#echo "Content-type:text/html"
echo
echo "
time=`date`
echo "
echo ""
即包括服务器响应给客户的字符数据,顺便把服务器时间传过去。注意要加test.sh添加执行权限,才会被视为执行cgi程序,且href中的端口号要改为你具体的端口号,这里只是个示例。来看当在浏览器中点击“Display Date”时,服务器作出的响应:
(gdb) l execute_cgi 【为了节省空间,以下内容我删除了无关内容】
warning: Source file is more recent than executable.
214 * Parameters: client socket descriptor
215 * path to the CGI script */
216 /**********************************************************************/
217 void execute_cgi(int client, const char *path,
218 const char *method, const char *query_string)
219 {
220 char buf[1024];
229
230 buf[0] = 'A'; buf[1] = '\0';
231 if (strcasecmp(method, "GET") == 0)
(gdb) b 231 【在execute_cgi处设置断点】
Breakpoint 1 at 0x8049555: file httpd.c, line 231.
(gdb) c
Continuing. 【当在浏览器发起请求时,serve_file被调用,但此时断点在execute_cgi处,所以此处没有反应直到鼠标点击链接】
[New Thread 0xb7567b40 (LWP 7708)]
[Thread 0xb7567b40 (LWP 7708) exited]
[New Thread 0xb6bffb40 (LWP 7709)]
[Thread 0xb6bffb40 (LWP 7709) exited]
[New Thread 0xb63feb40 (LWP 7710)]
[Switching to Thread 0xb63feb40 (LWP 7710)]
Breakpoint 1, execute_cgi (client=4, path=0xb63fdf4e "htdocs/test.sh", method=0xb63fe14e "GET",
query_string=0xb63fe255 "") at httpd.c:231
231 if (strcasecmp(method, "GET") == 0)
(gdb) info args 【查看此函数调用参数值】
client = 4
path = 0xb63fdf4e "htdocs/test.sh" 【文件为test.sh脚本】
method = 0xb63fe14e "GET"
query_string = 0xb63fe255 ""
257
258 if (pipe(cgi_output) < 0) {
259 cannot_execute(client);
260 return;
261 }
262 if (pipe(cgi_input) < 0) {
263 cannot_execute(client);
264 return;
265 }
266
(gdb) b 258 【在创建管道处设置断点】
Breakpoint 2 at 0x804973e: file httpd.c, line 258.
(gdb) c
Continuing.
Breakpoint 2, execute_cgi (client=4, path=0xb63fdf4e "htdocs/test.sh", method=0xb63fe14e "GET",
query_string=0xb63fe255 "") at httpd.c:258
258 if (pipe(cgi_output) < 0) {
(gdb) n
262 if (pipe(cgi_input) < 0) {
(gdb) n
267 if ( (pid = fork()) < 0 ) {
(gdb) l
262 if (pipe(cgi_input) < 0) {
263 cannot_execute(client);
264 return;
265 }
266
267 if ( (pid = fork()) < 0 ) {
268 cannot_execute(client);
269 return;
270 }
271 if (pid == 0) /* child: CGI script */
(gdb) l
272 {
273 char meth_env[255];
274 char query_env[255];
275 char length_env[255];
276
277 dup2(cgi_output[1], 1);
278 dup2(cgi_input[0], 0);
279 close(cgi_output[0]);
280 close(cgi_input[1]);
281 sprintf(meth_env, "REQUEST_METHOD=%s", method);
(gdb) l
282 putenv(meth_env);
283 if (strcasecmp(method, "GET") == 0) { 【我的测试例子虽说是get请求,但不需要设置什么环境变量】
284 sprintf(query_env, "QUERY_STRING=%s", query_string);
285 putenv(query_env);
286 }
287 else { /* POST */
288 sprintf(length_env, "CONTENT_LENGTH=%d", content_length);
289 putenv(length_env);
290 }
291 execl(path, path, NULL); 【子进程执行test.sh】
(gdb) l
292 exit(0);
293 }
294
295 else { /* parent */
296 close(cgi_output[1]);
297 close(cgi_input[0]);
298 if (strcasecmp(method, "POST") == 0)
299 for (i = 0; i < content_length; i++) {
300 recv(client, &c, 1, 0);
301 write(cgi_input[1], &c, 1);
(gdb) b 298 【由于子进程执行test.sh,父进程发送响应给浏览器,所以先进入父进程,看发的是什么】
Breakpoint 3 at 0x80498ec: file httpd.c, line 298.
(gdb) c
Continuing.
Breakpoint 3, execute_cgi (client=4, path=0xb63fdf4e "htdocs/test.sh", method=0xb63fe14e "GET",
query_string=0xb63fe255 "") at httpd.c:298
298 if (strcasecmp(method, "POST") == 0)
(gdb) n
304 while (read(cgi_output[0], &c, 1) > 0)
(gdb) l
299 for (i = 0; i < content_length; i++) { 【如果是POST,则还要继续从cgi_input中读取数据体,它被导入到标准输入,从而经由管道进入cgi_output[1]】
300 recv(client, &c, 1, 0);
301 write(cgi_input[1], &c, 1);
302 }
303
304 while (read(cgi_output[0], &c, 1) > 0)
305 send(client, &c, 1, 0);
306
307 close(cgi_output[0]);
308 close(cgi_input[1]);
(gdb) s 【单步从cgi_output[0]中读】
305 send(client, &c, 1, 0);
(gdb) p c
$1 = 10 '\n'
(gdb) s
305 send(client, &c, 1, 0);
(gdb) p c 【以下部分刚好读到的是test脚本的" 0)
305 send(client, &c, 1, 0);
306
307 close(cgi_output[0]);
308 close(cgi_input[1]);
309 waitpid(pid, &status, 0);
(gdb) b 307
Breakpoint 4 at 0x80499be: file httpd.c, line 307.
(gdb) c
Continuing.
Breakpoint 4, execute_cgi (client=4, path=0xb63fdf4e "htdocs/test.sh", method=0xb63fe14e "GET",
query_string=0xb63fe255 "") at httpd.c:307
307 close(cgi_output[0]);
(gdb) n
308 close(cgi_input[1]);
(gdb) n
309 waitpid(pid, &status, 0);
(gdb) n
311 }
(gdb) p status
$7 = 0
(gdb) n
accept_request (client=4) at httpd.c:139
139 close(client); 【直到这里,浏览器才显示了返回结果】
(gdb) n
140 }
(gdb)
结果显示:
当然我在这里只是演示了其中的一种情况,至于情况如get请求带?查询的,POST请求带数据体的,只有靠读者自己去尝试了,博主暂时抛砖引玉于此。
呃,感觉讲解至此结束了呢。貌似还有一点点细节博主还得继续研究下,总之通过这个例子确实对Linux编程了解了更多了,感谢开源,哈哈!
参考链接
1 http://blog.csdn.net/jcjc918/article/details/42129311
2 http://blog.sina.com.cn/s/blog_a5191b5c0102v9yr.html
3 CGI介绍:http://www.jdon.com/idea/cgi.htm
4 http://www.scholat.com/vpost.html?pid=7337