larbin源码之global.h

 1 /** This represent a connection : we have a fixed number of them

 2 * fetchOpen links them with servers

 3 * fetchPipe reads those which are linked

 4 */

 5 //表达连接相关信息

 6 struct Connexion {

 7     char state;      // socket状态 : EMPTY, CONNECTING, WRITE, OPEN

 8     int pos;         //请求发送位置标记

 9     FetchError err;  // 连接如何终止,enum

10     int socket;      // socket descriptor number

11     int timeout;  // 连接超时时间设置

12     LarbinString request;  // http请求报头

13     file *parser;    // 解析连接 (a robots.txt or an html file)

14     char buffer[maxPageSize];//下载的网页数据

15     Connexion();//初始化state=emptyC,parser=NULL

16     ~Connexion();//不执行,一旦执行就出错,assert(false)

17     void recycle();//释放*parser,再次初始化request

18 };

该结构体的主要两个类成员LarbinStrng,file。

LarbinString类(string.h声明,string.cc实现),主要是对字符串http报头的字符串相关操作

class LarbinString {

private:

    char *chaine;    //http报头字符串

    uint pos;    //http报头当前位置标记

    uint size;    //http报头大小

public:

    LarbinString(uint size = STRING_SIZE);    //初始化*chaine,pos=0,size

    ~LarbinString();        //释放*chaine

    void recycle(uint size = STRING_SIZE);    //重新分配*chaine

    char *getString();        //返回*chaine

    char *giveString();        //返回*chaine的拷贝

    void addChar(char c);    //chaine[pos]=c

    void addString(char *s);    //添加*s到*chine后(pos起)

    void addBuffer(char *s, uint len);    //添加*s到*chine后(pos起)

    inline uint getLength() { return pos; };    //return pos

    inline char operator [] (uint i)    //数组[]运算符重载

    void setChar(uint i, char c);    //chaine[i]=c

};

file类,(html类,robots类),二者继承file类。解析连接(robots.txt or .html file)

class file {

protected:

    char *buffer;    //connexion中的buffer[maxPageSize(100000)],下载的网页数据

    char *posParse;    //解析位置

public:

    file(Connexion *conn);    //初始化*buffer=*posParse=conn->buffer;pos=0

    virtual ~file();

    bool isRobots;    // Is it a robots.txt

    uint pos;    //*buffer的当前位置

    // a string arrives from the server

    virtual int inputHeaders(int size) = 0; // just parse headers

    virtual int endInput() = 0;

};



class html : public file {

private:

    url *here;    //url地址

    char *area;    //当前感兴趣区的起始位置

    char *contentStart;    //真正内容的起始位置,报头之后的内容

    url *base;    //url基地址

    /* manage a new url : verify and send it */

    void manageUrl(url *nouv, bool isRedir);

 

你可能感兴趣的:(global)