此类事用来存储已经访问过的网站,每一个NamedSite都对应着相应的IPsite
class NamedSite {
private:
/* string used for following CNAME chains (just one jump) */
char *cname;
/** we've got a good dns answer
* get the robots.txt */
void dnsOK ();
/** Cannot get the inet addr
* dnsState must have been set properly before the call */
void dnsErr ();
/** Delete the old identity of the site */
void newId (); //为网站删除旧的标识符
/** put this url in its IPSite */
void transfer (url *u); //将url放入他的IPSite中
/** forget this url for this reason */
void forgetUrl (url *u, FetchError reason);
public:
/** Constructor */
NamedSite ();
/** Destructor : never used */
~NamedSite ();
/* name of the site */
char name[maxSiteSize]; //网站名
/* port of the site */
uint16_t port;
/* numbers of urls in ram for this site */
uint16_t nburls;
/* fifo of urls waiting to be fetched */
url *fifo[maxUrlsBySite]; //此网站中存在的url
uint8_t inFifo;
uint8_t outFifo;
void putInFifo(url *u); //入队
url *getInFifo(); //出队
short fifoLength(); //长度
/** Is this Site in a dnsSites */
bool isInFifo;
/** internet addr of this server */
char dnsState;
struct in_addr addr;
uint ipHash; //此变量用以确定此NamedSite所对应的IPSite
/* Date of expiration of dns call and robots.txt fetch */
time_t dnsTimeout;
/** test if a file can be fetched thanks to the robots.txt */
bool testRobots(char *file);
/* forbidden paths : given by robots.txt */
Vector<char> forbidden; //获取robots中的禁止抓取的url
/** Put an url in the fifo
* If there are too much, put it back in UrlsInternal
* Never fill totally the fifo => call at least with 1 */
void putGenericUrl(url *u, int limit, bool prio);
inline void putUrl (url *u) { putGenericUrl(u, 15, false); }
inline void putUrlWait (url *u) { putGenericUrl(u, 10, false); }
inline void putPriorityUrl (url *u) { putGenericUrl(u, 5, true); }
inline void putPriorityUrlWait (url *u) { putGenericUrl(u, 1, true); }
/** Init a new dns query */
void newQuery ();
/** The dns query ended with success */
void dnsAns (adns_answer *ans);
/** we got the robots.txt, transfer what must be in IPSites */
void robotsResult (FetchError res);
};