有限状态机实例:服务端实现HTTP请求的读取和分析
TCP/IP在包的头部给出头部的长度字段,但HTTP协议未提供头部长度字段,(头部长度变化大),头部结束时遇到空行,\r\n
如果一次读操作没有读入HTTP请求的整个头部,即没有遇到空行,需要等待第二次读入,
在每一次完成读操作,要分析新读入的数据是否有空行,(\r\n前面有请求头和头部字段),所以在寻找\r\n的过程中可以顺便解析http头部
仅解析GET方法的http请求
#include "me.h"
#define BUFFER_SIZE 4096
//主状态机状态,
enum CHECK_STATE {
CHECK_STATE_REQUESTLINE = 0,
CHECK_STATE_HEADER
};
//子状态机状态,
enum LINE_STATUS {
LINE_OK = 0,
LINE_BAD,
LINE_OPEN
};
enum HTTP_CODE {
NO_REQUEST,
GET_REQUEST,
BAD_REQUEST,
FORRBIDEN_REQUEST,
INTERNAL_ERROR,
CLOSED_CONNECTION
};
const char* szret[] = {"I get a correct result\n","Something wrong\n"};
//子状态机
LINE_STATUS parse_line(char *buffer,int &checked_index,int &read_index)
{
char temp;
for (;checked_index < read_index; ++checked_index)
{
// printf("parse_line\n");
temp = buffer[checked_index];
// 如果当前是\r则可能读取到完整的行
if (temp == '\r')
{
//\r是最后一个,则说明接下来还有数据需要读
if ((checked_index + 1) == read_index)
{
// printf("parse_line: return LINE_OPEN\n");
return LINE_OPEN;
}
//读取到完整的行\r \n
else if (buffer[checked_index + 1] == '\n')
{
buffer[checked_index++] = '\0';
buffer[checked_index++] = '\0';
// printf("parse_line: return LINE_OK\n");
return LINE_OK;
}
//否则http包出现\r则出错
// printf("parse_line: return LINE_BAD\n");
return LINE_BAD;
}
//当前是\n,也可能出现完整行
else if (temp == '\n')
{
if ((checked_index > 1) && buffer[checked_index - 1] == '\r')
{
buffer[checked_index-1] = '\0';
buffer[checked_index++] = '\0';
// printf("parse_line: return LINE_OK\n");
return LINE_OK;
}
// printf("parse_line: return LINE_BAD\n");
return LINE_BAD;
}
}
// printf("parse_line: return LINE_OPEN\n");
// 所有内容分析完毕,没有碰到\r字符,则继续读取
return LINE_OPEN;
}
//分析请求行
HTTP_CODE parse_requestline(char* temp,CHECK_STATE &checkstate)
{
char *url = strpbrk(temp," \t");
if (!url)
{
return BAD_REQUEST;
}
*url++ = '\0';
char *method = temp;
if (strcasecmp(method,"GET") == 0)//仅支持GET方法
{
printf("The request method is: GET\n");
}
else
{
return BAD_REQUEST;
}
url += strspn(url," \t");
char *version = strpbrk(url," \t");
if (!version)
{
return BAD_REQUEST;
}
//version地址也拿到了
*version++ = '\0';
version += strspn(version," \t");
//仅支持HTTP/1.1版本
if (strcasecmp(version,"HTTP/1.1") != 0)
{
return BAD_REQUEST;
}
//检查url是否合法
if (strncasecmp(url,"http://",7) == 0)
{
url += 7;
url = strchr(url,'/');
}
if (!url || url[0] != '/')
{
return BAD_REQUEST;
}
printf("The request URL is: %s\n",url);
//请求行分析完毕,主状态机状态转移到 头部字段分析
checkstate = CHECK_STATE_HEADER;
return NO_REQUEST;
}
HTTP_CODE parse_headers(char *temp)
{
if (temp[0] == '\0')//遇到空行,分析完毕,
{
return GET_REQUEST;
}
else if (strncasecmp(temp,"Host:",5) == 0)//处理Host头部字段
{
temp += 5;
temp += strspn(temp," \t");
printf("the request host is: %s\n",temp);
}
else if (strncasecmp(temp,"Accept:",7) == 0)
{
temp += 7;
temp += strspn(temp," \t");
printf("the request Accept is: %s\n",temp);
}
else if (strncasecmp(temp,"User-Agent:",11) == 0)
{
temp += 11;
temp += strspn(temp," \t");
printf("the request User-Agent is: %s\n",temp);
}
//其他字段头部都不处理
else
{
printf("I can not handle this header\n");
}
// return GET_REQUEST;
return NO_REQUEST;//返回NO_REQUEST无影响,因为在parse_content内并没有对NO_REQUEST进行处理
}
//每次recv到客户端的数据都会调用此函数
HTTP_CODE parse_content(char* buffer,int &checked_index,CHECK_STATE &checkstate,int &read_index,int &start_line)
{
LINE_STATUS linestatus = LINE_OK;
HTTP_CODE retcode = NO_REQUEST;
while((linestatus = parse_line(buffer,checked_index,read_index)) == LINE_OK)
{
char *temp = buffer + start_line;
start_line = checked_index; //下一行的起始位置
//判断当前主状态机的状态
switch(checkstate)
{
case CHECK_STATE_REQUESTLINE:
{
// printf("parse_content: 执行分析请求行\n");
retcode = parse_requestline(temp,checkstate);
if (retcode == BAD_REQUEST)
{
// printf("parse_content: 分析请求行返回BAD_REQUEST\n");
return BAD_REQUEST;
}
break;
}
case CHECK_STATE_HEADER://headers其余字段不分析,返回NO_REQUEST
{
// printf("parse_content: 分析头部字段\n");
retcode = parse_headers(temp);
if (retcode == BAD_REQUEST)
{
return BAD_REQUEST;
}
else if (retcode == GET_REQUEST)
{
return GET_REQUEST;
}
break;
}
default:
{
return INTERNAL_ERROR;
}
}
}
// while条件不符合,判断子状态机状态
if (linestatus == LINE_OPEN)
{
return NO_REQUEST;//没有读取到完整的行,下次recv继续来
}
else
{
return BAD_REQUEST;//行中出现其他字符,则http请求分析直接失败,
}
}
int main(int argc,char* argv[])
{
if (argc <= 2)
{
printf("Usage: %s ip-address port\n",basename(argv[0]));
exit(1);
}
const char* ip = argv[1];
int port = atoi(argv[2]);
struct sockaddr_in addr;
memset(&addr,0,sizeof(addr));
addr.sin_family = AF_INET;
inet_pton(AF_INET,ip,&addr.sin_addr);
addr.sin_port = htons(port);
int listenfd = socket(AF_INET,SOCK_STREAM,0);
assert(listenfd >= 0);
int ret = bind(listenfd,(struct sockaddr*)&addr,sizeof(addr));
assert(ret != -1);
ret = listen(listenfd,5);
assert(ret != -1);
struct sockaddr_in client;
socklen_t client_addrlength = sizeof(client);
int fd = accept(listenfd,(struct sockaddr*)&client,&client_addrlength);
if (fd < 0)
{
printf("accept error!\n");
}
else
{
char buffer[BUFFER_SIZE];
memset(&buffer,'\0',sizeof(buffer));
int data_read = 0;
int read_index = 0;
int checked_index = 0;
int start_line = 0;
CHECK_STATE checkstate = CHECK_STATE_REQUESTLINE;//主状态机初始状态是 读取请求行
while(1)
{
data_read = recv(fd,buffer + read_index,BUFFER_SIZE - read_index,0);
if (data_read == -1)
{
printf("read failed,sockfd: %d\n");
break;
}
else if (data_read == 0)
{
printf("remote client has closed the connection\n");
break;
}
read_index += data_read;
HTTP_CODE result = parse_content(buffer,checked_index,checkstate,read_index,start_line);
if (result == NO_REQUEST)//一行没有读取完毕
{
continue;
}
else if (result == GET_REQUEST)//完整的GET方法请求
{
send(fd,szret[0],strlen(szret[0]),0);
// printf("send get_request\n");
break;
}
else
{
send(fd,szret[1],strlen(szret[1]),0);
// printf("send something wrong\n");
break;
}
}
close(fd);
}
close(listenfd);
return 0;
}
头文件 me.h,位于当前目录的子目录include/下
#ifndef ME_H
#define ME_H
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define error_handle(str) {\
fprintf(stderr,"%s\n",str);\
exit(1);}
#endif
编译:g++ httpstate1.cpp -Iinclude/ -o httpstate1
测试机器(本地ip) 192.168.247.153 随机选择一个端口12345
./httpstate1 192.168.247.153 12345
使用curl模拟GET方法的HTTP请求
curl -v http://192.168.247.153:12345/test
(注意目标的ip和端口)
httpstate1解析结果:
我是在查看curl后发送的字段后然后在代码中添加了最后2个字段的解析,
代码中使用了2个状态机,
主状态机 CHECK_STATE , 初始状态:解析请求行(CHECK_STATE_REQUESTLINE),
主状态机在内部调用了从状态机
从状态机: LINE_STATUS,初始状态:LINE_OK,(也是检测到\r\n完成的http请求的状态),
当parse_line函数处理新读取的Buffer中
- 不存在完整的请求,则状态转移到LINE_OPEN
- 读取到\r\n,则是完整的行,状态仍是LINE_OK,则由主状态机继续来处理
- \r或\n单独出现在http请求中,说明http请求语法错误,则转移到LINE_BAD状态
转移到主状态机时:
如果主状态是: 初始状态(CHECK_STATE_REQUESTLINE):则调用parse_requestline()来分析请求行,
如果主状态时:CHECK_STATE_HEADER,则调用parse_headers来分析头部字段,
parse_requestline()在成功分析后将主状态转移到CHECK_STATE_HEADER
<参考Linux高性能服务器编程>