class IPSite {

 private:

  /* date of last access : avoid rapid fire */

  time_t lastAccess;

  /** Is this Site in a okSites (eg have something to fetch) */

  bool isInFifo;

  /** Get an url from the fifo

   * resize tab if too big

   */

  url *getUrl ();

 public:

  /** Constructor */

  IPSite ();

  /** Destructor : never used */

  ~IPSite ();

  /** Urls waiting for being fetched */

  Fifo tab;

  /** Put an url in the fifo */

  void putUrl (url *u);

  /** fetch the fist page in the fifo okSites

   * expects at least one element in freeConns

   * return expected time for next call (0 means now)

   */

  int fetch ();             //taburl的抓取。

};

 

 

疑问:  如果每一个NamedSite都对应一个IPSite,IPSiteNamedSite中为什么都有一个url的队列?

 

 

int IPSite::fetch () {

  if (tab.isEmpty()) {        抓取tab中的url

         // no more url to read

         // This is possible because this function can be called recursively

         isInFifo = false;

    return 0;

  } else {

    int next_call = lastAccess + global::waitDuration;

    if (next_call > global::now) {

      global::okSites->rePut(this);

      return next_call;

    } else {

      Connexion *conn = global::freeConns->get();         //建立链接

      url *u = getUrl();          //获取IPSite中的url

      // We're allowed to fetch this one

      // open the socket and write the request

      char res = getFds(conn, &(u->addr), u->getPort());

      if (res != emptyC) {

        lastAccess = global::now;

        conn->timeout = timeoutPage;

 

// http报文的组建

 

        conn->request.addString("GET ");

        if (global::proxyAddr != NULL) {

          char *tmp = u->getUrl();

          conn->request.addString(tmp);

        } else {

          conn->request.addString(u->getFile());

        }

        conn->request.addString(" HTTP/1.0\r\nHost: ");

        conn->request.addString(u->getHost());

#ifdef COOKIES

        if (u->cookie != NULL) {

          conn->request.addString("\r\nCookie: ");

          conn->request.addString(u->cookie);

        }

#endif // COOKIES

        conn->request.addString(global::headers);

        conn->parser = new html (u, conn);

        conn->pos = 0;

        conn->err = success;

        conn->state = res;

        if (tab.isEmpty()) {

          isInFifo = false;

        } else {

          global::okSites->put(this);

        }

        return 0;

      } else {

        // Unable to connect

        fetchFail(u, noConnection);      //抓取失败则记录原因

        answers(noConnection);

        delete u;

        global::freeConns->put(conn);     //有待进一步研究

        return fetch();    //递归抓取

      }   

    }

  }

}