【网络编程】有限状态机实例:HTTP请求的读取与分析

一、概念

  有限状态机是逻辑单元内部的一种高效编程方法。对于它的实例——HTTP请求的读取与分析功能的实现来讲,它的两种有限状态机构成分别称为主状态机和从状态机,并且主状态机是在内部调用从状态机的。
  在代码中,主状态机使用checkstate变量来记录当前的状态。其中,一共有以下两种状态。

  • CHECK_STATE_REQUESTLINE,表示当前行是请求行,主状态机调用parse_requestline来分析请求行。
  • CHECK_STATE_HEADER,表示当前行是头部字段,主状态机调用parse_headers来分析头部字段。

  在从状态机(parse_line)中设置了三个状态,通过recv函数读入用户数据到缓冲区,并且更具用户数据实现状态的转换。

二、代码

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#define BUFFER_SIZE 4096     /*缓冲区大小*/

/*主状态机的两种可能状态,分别表示:当前正在分析请求行,当前正在分析头部字段*/
enum CHECK_STATE {CHECK_STATE_REQUESTLINE = 0, CHECK_STATE_HEADER};

/*从状态机的三种可能状态,即每一行的读取状态,分别表示:读取到一个完整的行,行出错和行数据暂且不完整*/
enum LINE_STATUS {LINE_OK = 0, LINE_BAD, LINE_OPEN};

/*服务器处理HTTP请求的结果:NO_REQUEST表示请求不完整,需要继续读取客户数据
                          GET_REQUEST表示获得了一个完整的客户请求
                          BAD_REQUEST表示客户请求有语法错误
                          FORBIDDEN_REQUEST表示客户对资源没有足够的访问权限
                          INTERNAL_ERROR表示服务器内部错误
                          CLOSED_CONNECTION表示客户端已经关闭连接了
*/
enum HTTP_CODE {NO_REQUEST, GET_REQUEST, BAD_REQUEST,
                FORBIDDEN_REQUEST, INTERNAL_ERROR, CLOSED_CONNECTION};

/*为了简化问题,我们没有给客户端发送完整的HTTP应答报文,而是根据服务器的处理结果发送以下成功或者失败信息*/
static const char* szret[] = {"I get a correct result\n", "Something wrong\n"};


/*
从状态机,用于解析一行内容,checked_index表示当前正在分析的字节,read_index表示buffer中最后一个字节的下一字节
第checked_index到read_index的内容由接下来的函数挨个分析
*/
LINE_STATUS parse_line(char* buffer, int& checked_index, int& read_index){
    char temp;
    for(; checked_index < read_index; checked_index++){
        temp = buffer[checked_index];
        if(temp == '\r'){
            if((checked_index + 1) == read_index){
                return LINE_OPEN;
            }
            else if(buffer[checked_index + 1] == '\n'){
                buffer[checked_index ++] = '\0';
                buffer[checked_index ++] = '\0';
                return LINE_OK;
            }
            return LINE_BAD;
        }
        else if(temp == '\n'){
            if( (checked_index > 1) && (buffer[checked_index = 1] == '\r')) {
                buffer[checked_index - 1] = '\0';
                buffer[checked_index ++] = '\0';
                return LINE_OK;
            }
            return LINE_BAD;
        }
    }
    return LINE_OPEN;
}

/*分析请求行*/
HTTP_CODE parse_requestline(char* temp, CHECK_STATE& checkstate){
    char* url = strpbrk(temp, "\t");
    if(!url){
        return BAD_REQUEST;
    }
    *url ++ = '\0';
    char* method = temp;

    if(strcasecmp(method, "GET") == 0)
        printf("The request method is GET\n");
    else
        return BAD_REQUEST;

    url += strspn(url, "\t");
    char* version = strpbrk(url, "\t");
    if(!version)
        return BAD_REQUEST;

    *version ++ = '\0';
    version += strspn(version, "\t");

    if(strcasecmp(version, "HTTP/1.1") != 0)
        return BAD_REQUEST;
    
    if(strncasecmp(url, "http://", 7) == 0){
        url += 7;
        url = strchr(url, '/');
    }

    if(!url || url[0] == '/')
        return BAD_REQUEST;
    
    printf("The url is: %s\n", url);
    checkstate = CHECK_STATE_HEADER;
    return NO_REQUEST;
}

/*分析头部字段*/
HTTP_CODE parse_headers(char* temp){
    /*遇到一个空行,说明是一个正常的HTTP请求*/
    if(temp[0] == '\0'){
        return GET_REQUEST;
    }
    else if(strncasecmp(temp, "HOST:", 5) == 0){
        temp += 5;
        temp += strspn(temp, "\t");
        printf("The request host is : %s\n", temp);
    }
    else
        printf("I can't handle this header\n");
    return NO_REQUEST;
}

/*分析HTTP请求的入口函数*/
HTTP_CODE parse_content(char* buffer, int& checked_index, CHECK_STATE& checkstate, int& read_index, int& start_line){
    LINE_STATUS linestatus = LINE_OK;         /*记录此行的读取状态*/
    HTTP_CODE retcode = NO_REQUEST;           /*记录HTTP请求的处理结果*/
    /*主状态机,从buffer中取出所有完整的行*/
    while( ( linestatus = parse_line(buffer, checked_index, read_index) ) == LINE_OK){
        char* temp = buffer + start_line;    /*start_line是行在buffer中的起始位置*/
        start_line = checked_index;         /*记录下一行的起始位置*/
        /*checkedstate记录主状态机的状态*/
        switch(checkstate){
            case CHECK_STATE_REQUESTLINE:     /*第一个状态,分析请求行*/
            {
                retcode = parse_requestline(temp, checkstate);
                if(retcode == BAD_REQUEST){
                    return BAD_REQUEST;
                }
                break;
            }
            case CHECK_STATE_HEADER:        /*第二个状态,分析头部字段*/
            {
                retcode = parse_headers(temp);
                if(retcode == BAD_REQUEST)
                    return BAD_REQUEST;
                else if(retcode == GET_REQUEST)
                    return GET_REQUEST;
                break;
            }
            default:
            {
                return INTERNAL_ERROR;
            }
        }
    }
    if(linestatus == LINE_OPEN)
        return NO_REQUEST;
    else
        return BAD_REQUEST;
}

int main(int argc, char* argv[]){
    if(argc <= 2){
        printf("usage: %s ip_address port_number\n", basename(argv[0]));
        return 1;
    }
    const char* ip = argv[1];
    int port = atoi(argv[2]);

    struct sockaddr_in address;
    bzero(&address, sizeof(address));
    address.sin_family = AF_INET;
    inet_pton(AF_INET, ip, &address.sin_addr);
    address.sin_port = htons(port);

    int listenfd = socket(PF_INET, SOCK_STREAM, 0);
    assert(listenfd >= 0);
    int ret = bind(listenfd, (struct sockaddr*)&address, sizeof(address));
    assert(ret != -1);
    ret = listen(listenfd, 5);
    assert(ret != -1);
    struct sockaddr_in client_address;
    socklen_t client_addrlength = sizeof(client_address);
    int fd = accept(listenfd, (struct sockaddr*)&client_address, &client_addrlength);
    
    if(fd < 0)
        printf("errno is %d\n", errno);
    else{
        char buffer[BUFFER_SIZE];
        memset(buffer, '\0', BUFFER_SIZE);
        int data_read = 0;
        int read_index = 0;
        int checked_index = 0;
        int start_line = 0;

        /*设置主状态机初始状态*/
        CHECK_STATE checkstate = CHECK_STATE_REQUESTLINE;
        while(1){              /*读取客户数据并分析*/
            data_read = recv(fd, buffer + read_index, BUFFER_SIZE - read_index, 0);
            if(data_read == -1){
                printf("reading failed\n");
                break;
            }
            else if(data_read == 0){
                printf("remote client has closed the connection\n");
                break;
            }
            read_index += data_read;
            /*分析目前获得的所有客户数据*/
            HTTP_CODE result = parse_content(buffer, checked_index, checkstate, read_index, start_line);
            if(result == NO_REQUEST)
                continue;
            else if(result == GET_REQUEST){
                send(fd, szret[0], strlen(szret[0]), 0);
                break;
            }
            else{
                send(fd, szret[1], strlen(szret[1]), 0);
                break;
            }
        }
        close(fd);
    }
    close(listenfd);
    return 0;
}



你可能感兴趣的:(网络编程,网络,http)