有限状态机实例:服务端实现简单的HTTP请求的读取和分析

有限状态机实例:服务端实现HTTP请求的读取和分析

TCP/IP在包的头部给出头部的长度字段,但HTTP协议未提供头部长度字段,(头部长度变化大),头部结束时遇到空行,\r\n

如果一次读操作没有读入HTTP请求的整个头部,即没有遇到空行,需要等待第二次读入,

在每一次完成读操作,要分析新读入的数据是否有空行,(\r\n前面有请求头和头部字段),所以在寻找\r\n的过程中可以顺便解析http头部

仅解析GET方法的http请求

#include "me.h"
#define BUFFER_SIZE 4096 

//主状态机状态,
enum CHECK_STATE {
  CHECK_STATE_REQUESTLINE = 0,
  CHECK_STATE_HEADER
};

//子状态机状态,
enum LINE_STATUS {
  LINE_OK = 0,
  LINE_BAD,
  LINE_OPEN
};

enum HTTP_CODE {
  NO_REQUEST,
  GET_REQUEST,
  BAD_REQUEST,
  FORRBIDEN_REQUEST,
  INTERNAL_ERROR,
  CLOSED_CONNECTION
};

const char* szret[] = {"I get a correct result\n","Something wrong\n"};

//子状态机
LINE_STATUS parse_line(char *buffer,int &checked_index,int &read_index)
{
  char temp;
  for (;checked_index < read_index; ++checked_index)
  {
    // printf("parse_line\n");
    temp = buffer[checked_index];
    // 如果当前是\r则可能读取到完整的行
    if (temp == '\r')
    {
      //\r是最后一个,则说明接下来还有数据需要读
      if ((checked_index + 1) == read_index)
      {
        // printf("parse_line: return LINE_OPEN\n");
        return LINE_OPEN;
      }
      //读取到完整的行\r \n
      else if (buffer[checked_index + 1] == '\n')
      {
        buffer[checked_index++] = '\0';
        buffer[checked_index++] = '\0';
        // printf("parse_line: return LINE_OK\n");
        return LINE_OK;
      }
      //否则http包出现\r则出错
      // printf("parse_line: return LINE_BAD\n");
      return LINE_BAD;
    }
    //当前是\n,也可能出现完整行
    else if (temp == '\n')
    {
      if ((checked_index > 1) && buffer[checked_index - 1] == '\r')
      {
        buffer[checked_index-1] = '\0';
        buffer[checked_index++] = '\0';
        // printf("parse_line: return LINE_OK\n");
        return LINE_OK;
      }
      // printf("parse_line: return LINE_BAD\n");
      return LINE_BAD;
    }
  }
  // printf("parse_line: return LINE_OPEN\n");
  // 所有内容分析完毕,没有碰到\r字符,则继续读取
  return LINE_OPEN;
}

//分析请求行
HTTP_CODE parse_requestline(char* temp,CHECK_STATE &checkstate)
{
  char *url = strpbrk(temp," \t");
  if (!url)
  {
    return BAD_REQUEST;
  }
  *url++ = '\0';
  
  char *method = temp;
  if (strcasecmp(method,"GET") == 0)//仅支持GET方法
  {
    printf("The request method is: GET\n");
  }
  else 
  {
    return BAD_REQUEST;
  }

  url += strspn(url," \t");
  char *version = strpbrk(url," \t");
  if (!version)
  {
    return BAD_REQUEST;
  }
  //version地址也拿到了
  
  *version++ = '\0';
  version += strspn(version," \t");

  //仅支持HTTP/1.1版本
  if (strcasecmp(version,"HTTP/1.1") != 0)
  {
    return BAD_REQUEST;
  }

  //检查url是否合法
  if (strncasecmp(url,"http://",7) == 0)
  {
    url += 7;
    url = strchr(url,'/');
  }

  if (!url || url[0] != '/')
  {
    return BAD_REQUEST;
  }
  printf("The request URL is: %s\n",url);
  //请求行分析完毕,主状态机状态转移到 头部字段分析
  checkstate = CHECK_STATE_HEADER;
  return NO_REQUEST;
}

HTTP_CODE parse_headers(char *temp)
{
  if (temp[0] == '\0')//遇到空行,分析完毕,
  {
    return GET_REQUEST;
  }
  else if (strncasecmp(temp,"Host:",5) == 0)//处理Host头部字段
  {
    temp += 5;
    temp += strspn(temp," \t");
    printf("the request host is: %s\n",temp);
  }
  else if (strncasecmp(temp,"Accept:",7) == 0)
  {
    temp += 7;
    temp += strspn(temp," \t");
    printf("the request Accept is: %s\n",temp);
  }
  else if (strncasecmp(temp,"User-Agent:",11) == 0)
  {
    temp += 11;
    temp += strspn(temp," \t");
    printf("the request User-Agent is: %s\n",temp);
  }
  //其他字段头部都不处理
  else 
  {
    printf("I can not handle this header\n");
  }
  // return GET_REQUEST;
  return NO_REQUEST;//返回NO_REQUEST无影响,因为在parse_content内并没有对NO_REQUEST进行处理
}

//每次recv到客户端的数据都会调用此函数
HTTP_CODE parse_content(char* buffer,int &checked_index,CHECK_STATE &checkstate,int &read_index,int &start_line)
{
  LINE_STATUS linestatus = LINE_OK;
  HTTP_CODE retcode = NO_REQUEST;
  while((linestatus = parse_line(buffer,checked_index,read_index)) == LINE_OK)
  {
    char *temp = buffer + start_line;
    start_line = checked_index; //下一行的起始位置
    //判断当前主状态机的状态
    switch(checkstate)
    {
    case CHECK_STATE_REQUESTLINE:
      {
        // printf("parse_content: 执行分析请求行\n");
        retcode = parse_requestline(temp,checkstate);
        if (retcode == BAD_REQUEST)
        {
          // printf("parse_content: 分析请求行返回BAD_REQUEST\n");
          return BAD_REQUEST;
        }
        break;
      }
    case CHECK_STATE_HEADER://headers其余字段不分析,返回NO_REQUEST
      {
        // printf("parse_content: 分析头部字段\n");
        retcode = parse_headers(temp);
        if (retcode == BAD_REQUEST)
        {
          return BAD_REQUEST;
        }
        else if (retcode == GET_REQUEST)
        {
          return GET_REQUEST;
        }
        break;
      }
    default:
      {
        return INTERNAL_ERROR;
      }
    }
  }

  // while条件不符合,判断子状态机状态
  if (linestatus == LINE_OPEN)
  {
    return NO_REQUEST;//没有读取到完整的行,下次recv继续来
  }
  else 
  {
    return BAD_REQUEST;//行中出现其他字符,则http请求分析直接失败,
  }
}

int main(int argc,char* argv[])
{
  if (argc <= 2)
  {
    printf("Usage: %s ip-address port\n",basename(argv[0]));
    exit(1);
  }

  const char* ip = argv[1];
  int port = atoi(argv[2]);

  struct sockaddr_in addr;
  memset(&addr,0,sizeof(addr));

  addr.sin_family = AF_INET;
  inet_pton(AF_INET,ip,&addr.sin_addr);
  addr.sin_port = htons(port);

  int listenfd = socket(AF_INET,SOCK_STREAM,0);
  assert(listenfd >= 0);
  int ret = bind(listenfd,(struct sockaddr*)&addr,sizeof(addr));
  assert(ret != -1);
  ret = listen(listenfd,5);
  assert(ret != -1); 

  struct sockaddr_in client;
  socklen_t client_addrlength = sizeof(client);
  int fd = accept(listenfd,(struct sockaddr*)&client,&client_addrlength);
  if (fd < 0)
  {
    printf("accept error!\n");
  }
  else 
  {
    char buffer[BUFFER_SIZE];
    memset(&buffer,'\0',sizeof(buffer));

    int data_read = 0;
    int read_index = 0;
    int checked_index = 0;
    int start_line = 0;

    CHECK_STATE checkstate = CHECK_STATE_REQUESTLINE;//主状态机初始状态是 读取请求行 
    
    while(1)
    {
      data_read = recv(fd,buffer + read_index,BUFFER_SIZE - read_index,0);
      if (data_read == -1)
      {
        printf("read failed,sockfd: %d\n");
        break;
      }
      else if (data_read == 0)
      {
        printf("remote client has closed the connection\n");
        break;
      }
      read_index += data_read;

      HTTP_CODE result = parse_content(buffer,checked_index,checkstate,read_index,start_line);
      if (result == NO_REQUEST)//一行没有读取完毕
      {
        continue;
      }
      else if (result == GET_REQUEST)//完整的GET方法请求
      {
        send(fd,szret[0],strlen(szret[0]),0);
        // printf("send get_request\n");
        break;
      }
      else
      {
        send(fd,szret[1],strlen(szret[1]),0);
        // printf("send something wrong\n");
        break;
      }
    }
    close(fd);
  }
  close(listenfd);
  return 0;
}

头文件 me.h,位于当前目录的子目录include/下

#ifndef ME_H
#define ME_H

#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 

#define error_handle(str) {\
fprintf(stderr,"%s\n",str);\
exit(1);}
#endif 

编译:g++ httpstate1.cpp -Iinclude/ -o httpstate1

测试

测试机器(本地ip) 192.168.247.153 随机选择一个端口12345

./httpstate1 192.168.247.153 12345

使用curl模拟GET方法的HTTP请求

curl -v http://192.168.247.153:12345/test (注意目标的ip和端口)


curl模拟的http请求:
有限状态机实例:服务端实现简单的HTTP请求的读取和分析_第1张图片

httpstate1解析结果:

有限状态机实例:服务端实现简单的HTTP请求的读取和分析_第2张图片

我是在查看curl后发送的字段后然后在代码中添加了最后2个字段的解析,


1

代码中使用了2个状态机,

主状态机 CHECK_STATE , 初始状态:解析请求行(CHECK_STATE_REQUESTLINE),

主状态机在内部调用了从状态机

从状态机: LINE_STATUS,初始状态:LINE_OK,(也是检测到\r\n完成的http请求的状态),

当parse_line函数处理新读取的Buffer中

  1. 不存在完整的请求,则状态转移到LINE_OPEN
  2. 读取到\r\n,则是完整的行,状态仍是LINE_OK,则由主状态机继续来处理
  3. \r或\n单独出现在http请求中,说明http请求语法错误,则转移到LINE_BAD状态

转移到主状态机时:

如果主状态是: 初始状态(CHECK_STATE_REQUESTLINE):则调用parse_requestline()来分析请求行,

如果主状态时:CHECK_STATE_HEADER,则调用parse_headers来分析头部字段,

parse_requestline()在成功分析后将主状态转移到CHECK_STATE_HEADER

<参考Linux高性能服务器编程>

你可能感兴趣的:(计算机网络)