class IPSite {
private:
/* date of last access : avoid rapid fire */
time_t lastAccess;
/** Is this Site in a okSites (eg have something to fetch) */
bool isInFifo;
/** Get an url from the fifo
* resize tab if too big
*/
url *getUrl ();
public:
/** Constructor */
IPSite ();
/** Destructor : never used */
~IPSite ();
/** Urls waiting for being fetched */
Fifo<url> tab;
/** Put an url in the fifo */
void putUrl (url *u);
/** fetch the fist page in the fifo okSites
* expects at least one element in freeConns
* return expected time for next call (0 means now)
*/
int fetch (); //对tab中url的抓取。
};
疑问: 如果每一个NamedSite都对应一个IPSite,那IPSite和NamedSite中为什么都有一个url的队列?
int IPSite::fetch () {
if (tab.isEmpty()) { 抓取tab中的url
// no more url to read
// This is possible because this function can be called recursively
isInFifo = false;
return 0;
} else {
int next_call = lastAccess + global::waitDuration;
if (next_call > global::now) {
global::okSites->rePut(this);
return next_call;
} else {
Connexion *conn = global::freeConns->get(); //建立链接
url *u = getUrl(); //获取IPSite中的url
// We're allowed to fetch this one
// open the socket and write the request
char res = getFds(conn, &(u->addr), u->getPort());
if (res != emptyC) {
lastAccess = global::now;
conn->timeout = timeoutPage;
// http报文的组建
conn->request.addString("GET ");
if (global::proxyAddr != NULL) {
char *tmp = u->getUrl();
conn->request.addString(tmp);
} else {
conn->request.addString(u->getFile());
}
conn->request.addString(" HTTP/1.0\r\nHost: ");
conn->request.addString(u->getHost());
#ifdef COOKIES
if (u->cookie != NULL) {
conn->request.addString("\r\nCookie: ");
conn->request.addString(u->cookie);
}
#endif // COOKIES
conn->request.addString(global::headers);
conn->parser = new html (u, conn);
conn->pos = 0;
conn->err = success;
conn->state = res;
if (tab.isEmpty()) {
isInFifo = false;
} else {
global::okSites->put(this);
}
return 0;
} else {
// Unable to connect
fetchFail(u, noConnection); //抓取失败则记录原因
answers(noConnection);
delete u;
global::freeConns->put(conn); //有待进一步研究
return fetch(); //递归抓取
}
}
}
}