爬虫Larbin解析(二)——sequencer()

分析的函数: void sequencer() 

//位置:larbin-2.6.3/src/fetch/sequencer.cc
void
sequencer() { bool testPriority = true; if (space == 0) //unit space = 0 { space = global::inter->putAll(); } int still = space; if (still > maxPerCall) //#define maxPerCall 100 still = maxPerCall; while (still) { if (canGetUrl(&testPriority)) { --space; --still; } else { still = 0; } } }

所在的文件

larbin-2.6.3/src/fetch/sequencer.h、larbin-2.6.3/src/fetch/sequencer.cc

爬虫Larbin解析(二)——sequencer()
// Larbin

// Sebastien Ailleret

// 15-11-99 -> 15-11-99



#ifndef SEQUENCER_H

#define SEQUENCER_H



/** only for debugging, handle with care */

extern uint space;



/** Call the sequencer */

void sequencer ();



#endif
View Code
爬虫Larbin解析(二)——sequencer()
// Larbin

// Sebastien Ailleret

// 15-11-99 -> 04-01-02



#include <iostream.h>



#include "options.h"



#include "global.h"

#include "types.h"

#include "utils/url.h"

#include "utils/debug.h"

#include "fetch/site.h"



static bool canGetUrl (bool *testPriority);

uint space = 0;



#define maxPerCall 100



/** start the sequencer*/

//按优先度将URL放到代爬站点

void sequencer() 

{

    bool testPriority = true;

    if (space == 0) 

    {

        space = global::inter->putAll();

    }

    int still = space;

    if (still > maxPerCall)  //#define maxPerCall 100

        still = maxPerCall;

    while (still) 

    {

        if (canGetUrl(&testPriority)) 

        {

            space--; 

            still--; 

        } 

        else 

        { 

            still = 0; 

        } 

    }

}



/* Get the next url

 * here is defined how priorities are handled

 按优先级从各个URL队列

 (比如URLsDisk,URLsDiskWait或URLsPriority,URLsPriorityWait)

 获取url保存到某个NameSite(通过url的hash值)

 */

static bool canGetUrl (bool *testPriority) 

{

    url *u;

    if (global::readPriorityWait)  // global.cc  赋值为0

    {

        global::readPriorityWait--;

        u = global::URLsPriorityWait->get();

        global::namedSiteList[u->hostHashCode()].putPriorityUrlWait(u);

        return true;

    } 

    else if (*testPriority && (u=global::URLsPriority->tryGet()) != NULL) 

    {

        // We've got one url (priority)

        global::namedSiteList[u->hostHashCode()].putPriorityUrl(u);

        return true;

    } 

    else 

    {

        *testPriority = false;

        // Try to get an ordinary url

        if (global::readWait) 

        {

          global::readWait--;

          u = global::URLsDiskWait->get();

          global::namedSiteList[u->hostHashCode()].putUrlWait(u);

          return true;

        } 

        else 

        {

            u = global::URLsDisk->tryGet();

            if (u != NULL) 

            {

                global::namedSiteList[u->hostHashCode()].putUrl(u);

                return true;

            }

            else 

            {

                return false;

            }

        }

    }

}
View Code

 

一、 对于space = global::inter->putAll();

1. interf在global.cc(位置:/larbin-2.6.3/src/global.cc)中的定义为

inter = new Interval(ramUrls);   //#define ramUrls 100000  (位置:larbin-2.6.3/src/types.h)

批注:区别 inter = new Interval(ramUrls);  和 inter = new Interval[ramUrls];  前一个()内是参数,要传入构造函数的;后一个[]内是开辟数组的个数。

2. 类 Interval定义(位置:/larbin-2.6.3/src/fetch/site.h)

爬虫Larbin解析(二)——sequencer()
/** This class is intended to make sure the sum of the

 * sizes of the fifo included in the different sites

 * are not too big

 */

class Interval 

{

    public:

        Interval (uint sizes) : size(sizes), pos(0) {}

        ~Interval () {}

        /** How many urls can we put. Answer 0: if no urls can be put */

        inline uint putAll () 

        { 

            int res = size - pos; 

            pos = size; 

            return res; 

        }

        /** Warn an url has been retrieved */

        inline void getOne () 

        { 

            --pos; 

        }

        /** only for debugging, handle with care */

        inline uint getPos () 

        { 

            return pos; 

        }

    private:

        /** Size of the interval */

        uint size;

        /** Position in the interval */

        uint pos;

};
View Code

批注:类内的函数定义为inline。对内联函数的几点说明:

  • 内联函数避免函数调用的开销。将函数指定为内联函数,(通常)就是将它在程序的每个调用点上“内联地”展开,消除调用函数进行的额外开销(调用前先保存寄存器,并在返回时回复)。内联说明(在函数返回值前加inline)对编译器来说只是一个建议,编译器可以选择忽略。一般内敛函数适用于优化小的、只有几行、经常被调用的函数。大多数编译器不支持递归函数的内敛。
  • 把内联函数放在头文件。以便编译器能够在调用点展开同一个函数(保证编译器可见、所有的定义相同)。
  • 编译器隐式地将在类内定义的成员函数当作为内联函数.

 

二、 对于canGetUrl(&testPriority)

函数定义(位置larbin-2.6.3/src/fetch/sequencer.cc)

/* Get the next url

 * here is defined how priorities are handled

 按优先级从各个URL队列

 (比如URLsDisk,URLsDiskWait或URLsPriority,URLsPriorityWait)

 获取url保存到某个NameSite(通过url的hash值)



at "global.cc"

// FIFOs

URLsDisk         = new PersistentFifo(reload, fifoFile);

URLsDiskWait     = new PersistentFifo(reload, fifoFileWait);

URLsPriority     = new SyncFifo<url>;

URLsPriorityWait = new SyncFifo<url>;



 */

static bool canGetUrl (bool *testPriority) 

{

    url *u;

    if (global::readPriorityWait != 0)  // 在global.cc声明定义: uint global::readPriorityWait=0;

    {

        global::readPriorityWait--;

        u = global::URLsPriorityWait->get();

        global::namedSiteList[u->hostHashCode()].putPriorityUrlWait(u);

        return true;

    } 

    else if (*testPriority && (u=global::URLsPriority->tryGet()) != NULL) 

    {

        // We've got one url (priority)

        global::namedSiteList[u->hostHashCode()].putPriorityUrl(u);

        return true;

    } 

    else 

    {

        *testPriority = false;

        // Try to get an ordinary url

        if (global::readWait) 

        {

          global::readWait--;

          u = global::URLsDiskWait->get();

          global::namedSiteList[u->hostHashCode()].putUrlWait(u);

          return true;

        } 

        else 

        {

            u = global::URLsDisk->tryGet();

            if (u != NULL) 

            {

                global::namedSiteList[u->hostHashCode()].putUrl(u);

                return true;

            }

            else 

            {

                return false;

            }

        }

    }

}

1. 为什么diskpriority的队列都是成对出现的,是因为可以认为每个sitenamedSiteList当中都有一个小的队列来保存它的url,这个url的个数是有个数限制的,当超过这个限制的时候就不能再把该site下的url放入,但也不能丢弃,而是放入wait队列。Larbin会控制一段时间在disk队列中取url,一段时间在diskWait当中取urldiskpriority的区别只是优先级的区别。namedSiteList的作用是实现了DNS缓存

          爬虫Larbin解析(二)——sequencer()

2. global::readPriorityWait 的值由main.cc的cron()函数中变化得知

// see if we should read again urls in fifowait

if ((global::now % 300) == 0) {

    global::readPriorityWait = global::URLsPriorityWait->getLength();

    global::readWait = global::URLsDiskWait->getLength();

}

if ((global::now % 300) == 150) {

    global::readPriorityWait = 0;

    global::readWait = 0;

}

这里global::now%300是判断这次是对wait里的url进行处理,还是对不是wait里的进行处理,这里的%300等于0150的概率都是1/300,所以大约300次换一次。readPriorityWaitURLsPriorityWait中的长度(也就是url的数量);readWait是URLsDiskWait中url的个数。

3. 在canGetUrl中,在对于每个站点,将相应的url放进去。putPriorityUrlWait, putPriorityUrl, putUrlWait, putUrl在site.h的定义如下

/** Put an url in the fifo

 * If there are too much, put it back in UrlsInternal

 * Never fill totally the fifo => call at least with 1 */

void putGenericUrl(url *u, int limit, bool prio);

inline void putUrl(url *u) {

    putGenericUrl(u, 15, false);

}

inline void putUrlWait(url *u) {

    putGenericUrl(u, 10, false);

}

inline void putPriorityUrl(url *u) {

    putGenericUrl(u, 5, true);

}

inline void putPriorityUrlWait(url *u) {

    putGenericUrl(u, 1, true);

}

 可以发现,每次都是调用函数putGenericUrl,其定义如下

/* Put an url in the fifo if their are not too many */

void NamedSite::putGenericUrl(url *u, int limit, bool prio) 
{
if (nburls > maxUrlsBySite - limit)
{
// Already enough Urls in memory for this Site // first check if it can already be forgotten if (!strcmp(name, u->getHost()))
{
if (dnsState == errorDns)
{ nburls
++; forgetUrl(u, noDNS); return; } if (dnsState == noConnDns)
{ nburls
++; forgetUrl(u, noConnection); return; } if (u->getPort() == port && dnsState == doneDns && !testRobots(u->getFile()))
{ nburls
++; forgetUrl(u, forbiddenRobots); return; } } // else put it back in URLsDisk refUrl(); global::inter->getOne(); if (prio)
{
global::URLsPriorityWait->put(u); }
else
{
global::URLsDiskWait->put(u); } }

如果已经有足够多的url在内存里,执行这里if中的代码,strcmp(name,u->getHost())是判断这个主机是不是已经就进行过dns方面的判断,也就是说对于一个站点,只做一次dns解析的判断,以后就按这个结果进行处理,dnsStatenoDnsnoConnDns,还有robots.txt不允许的情况,如果没有问题,就把它放到URLsDisk中。

else {

    nburls++;

    if (dnsState == waitDns || strcmp(name, u->getHost()) || port

           != u->getPort() || global::now > dnsTimeout) {

       // dns not done or other site

       putInFifo(u);

       addNamedUrl();

       // Put Site in fifo if not yet in

       if (!isInFifo) {

           isInFifo = true;

           global::dnsSites->put(this);

       }

    } else

       switch (dnsState) {

       case doneDns:

           transfer(u);

           break;

       case errorDns:

           forgetUrl(u, noDNS);

           break;

       default: // noConnDns

           forgetUrl(u, noConnection);

       }

}

 如果需要判断dns能不能解析,就将它放到dnsSites里,这个会在fetchDns中判断。或是如果还能放到内存里,并且又是doneDns,表示可以解析,就调用transfer

void NamedSite::transfer(url *u) {

    if (testRobots(u->getFile())) {

       if (global::proxyAddr == NULL) {

           memcpy(&u->addr, &addr, sizeof(struct in_addr));

       }

       global::IPSiteList[ipHash].putUrl(u);

    } else {

       forgetUrl(u, forbiddenRobots);

    }

}

这里是将url放入到IPSiteList的相应ipHash中。

 

附类的定义

类url定义(larbin-2.6.3/src/utils/url.h  larbin-2.6.3/src/utils/url.cc

爬虫Larbin解析(二)——sequencer()
// Larbin

// Sebastien Ailleret

// 15-11-99 -> 14-03-02



/* This class describes an URL */



#ifndef URL_H

#define URL_H



#include <netinet/in.h>

#include <sys/types.h>

#include <sys/socket.h>

#include <stdlib.h>



#include "types.h"



bool fileNormalize (char *file);



class url {

 private:

  char *host;

  char *file;

  uint16_t port; // the order of variables is important for physical size

  int8_t depth;

  /* parse the url */

  void parse (char *s);

  /** parse a file with base */

  void parseWithBase (char *u, url *base);

  /* normalize file name */

  bool normalize (char *file);

  /* Does this url starts with a protocol name */

  bool isProtocol (char *s);

  /* constructor used by giveBase */

  url (char *host, uint port, char *file);



 public:

  /* Constructor : Parses an url (u is deleted) */

  url (char *u, int8_t depth, url *base);



  /* constructor used by input */

  url (char *line, int8_t depth);



  /* Constructor : read the url from a file (cf serialize) */

  url (char *line);



  /* Destructor */

  ~url ();



  /* inet addr (once calculated) */

  struct in_addr addr;



  /* Is it a valid url ? */

  bool isValid ();



  /* print an URL */

  void print ();



  /* return the host */

  inline char *getHost () { return host; }



  /* return the port */

  inline uint getPort () { return port; }



  /* return the file */

  inline char *getFile () { return file; }



  /** Depth in the Site */

  inline int8_t getDepth () { return depth; }



  /* Set depth to max if we are at an entry point in the site

   * try to find the ip addr

   * answer false if forbidden by robots.txt, true otherwise */

  bool initOK (url *from);



  /** return the base of the url

   * give means that you have to delete the string yourself

   */

  url *giveBase ();



  /** return a char * representation of the url

   * give means that you have to delete the string yourself

   */

  char *giveUrl ();



  /** write the url in a buffer

   * buf must be at least of size maxUrlSize

   * returns the size of what has been written (not including '\0')

   */

  int writeUrl (char *buf);



  /* serialize the url for the Persistent Fifo */

  char *serialize ();



  /* very thread unsafe serialisation in a static buffer */

  char *getUrl();



  /* return a hashcode for the host of this url */

  uint hostHashCode ();



  /* return a hashcode for this url */

  uint hashCode ();



#ifdef URL_TAGS

  /* tag associated to this url */

  uint tag;

#endif // URL_TAGS



#ifdef COOKIES

  /* cookies associated with this page */

  char *cookie;

  void addCookie(char *header);

#else // COOKIES

  inline void addCookie(char *header) {}

#endif // COOKIES

};



#endif // URL_H
View Code
爬虫Larbin解析(二)——sequencer()
// Larbin

// Sebastien Ailleret

// 15-11-99 -> 16-03-02



/* This class describes an URL */



#include <assert.h>

#include <stdlib.h>

#include <stdio.h>

#include <string.h>

#include <ctype.h>

#include <sys/types.h>

#include <sys/socket.h>



#include "options.h"



#include "types.h"

#include "global.h"

#include "utils/url.h"

#include "utils/text.h"

#include "utils/connexion.h"

#include "utils/debug.h"



#ifdef COOKIES

#define initCookie() cookie=NULL

#else // COOKIES

#define initCookie() ((void) 0)

#endif // COOKIES



/* small functions used later */

static uint siteHashCode (char *host) {

  uint h=0;

  uint i=0;

  while (host[i] != 0) {

    h = 37*h + host[i];

    i++;

  }

  return h % namedSiteListSize;

}



/* return the int with correspond to a char

 * -1 if not an hexa char */

static int int_of_hexa (char c) {

  if (c >= '0' && c <= '9')

    return (c - '0');

  else if (c >= 'a' && c <= 'f')

    return (c - 'a' + 10);

  else if (c >= 'A' && c <= 'F')

    return (c - 'A' + 10);

  else

    return -1;

}



/* normalize a file name : also called by robots.txt parser

 * return true if it is ok, false otherwise (cgi-bin)

 */

bool fileNormalize (char *file) {

  int i=0;

  while (file[i] != 0 && file[i] != '#') {

    if (file[i] == '/') {

      if (file[i+1] == '.' && file[i+2] == '/') {

        // suppress /./

        int j=i+3;

        while (file[j] != 0) {

          file[j-2] = file[j];

          j++;

        }

        file[j-2] = 0;

      } else if (file[i+1] == '/') {

        // replace // by /

        int j=i+2;

        while (file[j] != 0) {

          file[j-1] = file[j];

          j++;

        }

        file[j-1] = 0;

      } else if (file[i+1] == '.' && file[i+2] == '.' && file[i+3] == '/') {

        // suppress /../

        if (i == 0) {

          // the file name starts with /../ : error

          return false;

        } else {

          int j = i+4, dec;

          i--;

          while (file[i] != '/') { i--; }

          dec = i+1-j; // dec < 0

          while (file[j] != 0) {

            file[j+dec] = file[j];

            j++;

          }

          file[j+dec] = 0;

        }

      } else if (file[i+1] == '.' && file[i+2] == 0) {

        // suppress /.

        file[i+1] = 0;

        return true;

      } else if (file[i+1] == '.' && file[i+2] == '.' && file[i+3] == 0) {

        // suppress /..

        if (i == 0) {

          // the file name starts with /.. : error

          return false;

        } else {

          i--;

          while (file[i] != '/') {

            i--;

          }

          file[i+1] = 0;

          return true;

        }

      } else { // nothing special, go forward

        i++;

      }

    } else if (file[i] == '%') {

      int v1 = int_of_hexa(file[i+1]);

      int v2 = int_of_hexa(file[i+2]);

      if (v1 < 0 || v2 < 0) return false;

      char c = 16 * v1 + v2;

      if (isgraph(c)) {

        file[i] = c;

        int j = i+3;

        while (file[j] != 0) {

          file[j-2] = file[j];

          j++;

        }

        file[j-2] = 0;

        i++;

      } else if (c == ' ' || c == '/') { // keep it with the % notation

        i += 3;

      } else { // bad url

        return false;

      }

    } else { // nothing special, go forward

      i++;

    }

  }

  file[i] = 0;

  return true;

}



/**************************************/

/* definition of methods of class url */

/**************************************/



/* Constructor : Parses an url */

url::url (char *u, int8_t depth, url *base) {

  newUrl();

  this->depth = depth;

  host = NULL;

  port = 80;

  file = NULL;

  initCookie();

#ifdef URL_TAGS

  tag = 0;

#endif // URL_TAGS

  if (startWith("http://", u)) {

    // absolute url

    parse (u + 7);

    // normalize file name

    if (file != NULL && !normalize(file)) {

      delete [] file;

      file = NULL;

      delete [] host;

      host = NULL;

    }

  } else if (base != NULL) {

    if (startWith("http:", u)) {

      parseWithBase(u+5, base);

    } else if (isProtocol(u)) {

      // Unknown protocol (mailto, ftp, news, file, gopher...)

    } else {

      parseWithBase(u, base);

    }

  }

}



/* constructor used by input */

url::url (char *line,  int8_t depth) {

  newUrl();

  this->depth = depth;

  host = NULL;

  port = 80;

  file = NULL;

  initCookie();

  int i=0;

#ifdef URL_TAGS

  tag = 0;

  while (line[i] >= '0' && line[i] <= '9') {

    tag = 10*tag + line[i] - '0';

    i++;

  }

  i++;

#endif // URL_TAGS

  if (startWith("http://", line+i)) {

    parse(line+i+7);

    // normalize file name

    if (file != NULL && !normalize(file)) {

      delete [] file;

      file = NULL;

      delete [] host;

      host = NULL;

    }

  }

}



/* Constructor : read the url from a file (cf serialize)

 */

url::url (char *line) {

  newUrl();

  int i=0;

  // Read depth

  depth = 0;

  while (line[i] >= '0' && line[i] <= '9') {

    depth = 10*depth + line[i] - '0';

    i++;

  }

#ifdef URL_TAGS

  // read tag

  tag = 0; i++;

  while (line[i] >= '0' && line[i] <= '9') {

    tag = 10*tag + line[i] - '0';

    i++;

  }

#endif // URL_TAGS

  int deb = ++i;

  // Read host

  while (line[i] != ':') {

    i++;

  }

  line[i] = 0;

  host = newString(line+deb);

  i++;

  // Read port

  port = 0;

  while (line[i] >= '0' && line[i] <= '9') {

    port = 10*port + line[i] - '0';

    i++;

  }

#ifndef COOKIES

  // Read file name

  file = newString(line+i);

#else // COOKIES

  char *cpos = strchr(line+i, ' ');

  if (cpos == NULL) {

    cookie = NULL;

  } else {

    *cpos = 0;

    // read cookies

    cookie = new char[maxCookieSize];

    strcpy(cookie, cpos+1);

  }

  // Read file name

  file = newString(line+i);

#endif // COOKIES

}



/* constructor used by giveBase */

url::url (char *host, uint port, char *file) {

  newUrl();

  initCookie();

  this->host = host;

  this->port = port;

  this->file = file;

}



/* Destructor */

url::~url () {

  delUrl();

  delete [] host;

  delete [] file;

#ifdef COOKIES

  delete [] cookie;

#endif // COOKIES

}



/* Is it a valid url ? */

bool url::isValid () {

  if (host == NULL) return false;

  int lh = strlen(host);

  return file!=NULL && lh < maxSiteSize

    && lh + strlen(file) + 18 < maxUrlSize;

}



/* print an URL */

void url::print () {

  printf("http://%s:%u%s\n", host, port, file);

}



/* Set depth to max if necessary

 * try to find the ip addr

 * answer false if forbidden by robots.txt, true otherwise */

bool url::initOK (url *from) {

#if defined(DEPTHBYSITE) || defined(COOKIES)

  if (strcmp(from->getHost(), host)) { // different site

#ifdef DEPTHBYSITE

    depth = global::depthInSite;

#endif // DEPTHBYSITE

  } else { // same site

#ifdef COOKIES

    if (from->cookie != NULL) {

      cookie = new char[maxCookieSize];

      strcpy(cookie, from->cookie);

    }

#endif // COOKIES

  }

#endif // defined(DEPTHBYSITE) || defined(COOKIES)

  if (depth < 0) {

    errno = tooDeep;

    return false;

  }

  NamedSite *ns = global::namedSiteList + (hostHashCode());

  if (!strcmp(ns->name, host) && ns->port == port) {

    switch (ns->dnsState) {

    case errorDns:

      errno = fastNoDns;

      return false;

    case noConnDns:

      errno = fastNoConn;

      return false;

    case doneDns:

      if (!ns->testRobots(file)) {

        errno = fastRobots;

        return false;

      }

    }

  }

  return true;

}



/* return the base of the url */

url *url::giveBase () {

  int i = strlen(file);

  assert (file[0] == '/');

  while (file[i] != '/') {

    i--;

  }

  char *newFile = new char[i+2];

  memcpy(newFile, file, i+1);

  newFile[i+1] = 0;

  return new url(newString(host), port, newFile);

}



/** return a char * representation of the url

 * give means that you have to delete the string yourself

 */

char *url::giveUrl () {

  char *tmp;

  int i = strlen(file);

  int j = strlen(host);



  tmp = new char[18+i+j];  // 7 + j + 1 + 9 + i + 1

                           // http://(host):(port)(file)\0

  strcpy(tmp, "http://");

  strcpy (tmp+7, host);

  j += 7;

  if (port != 80) {

    j += sprintf(tmp + j, ":%u", port);

  }

  // Copy file name

  while (i >= 0) {

    tmp [j+i] = file[i];

    i--;

  }

  return tmp;

}



/** write the url in a buffer

 * buf must be at least of size maxUrlSize

 * returns the size of what has been written (not including '\0')

 */

int url::writeUrl (char *buf) {

  if (port == 80)

    return sprintf(buf, "http://%s%s", host, file);

  else

    return sprintf(buf, "http://%s:%u%s", host, port, file);

}



/* serialize the url for the Persistent Fifo */

char *url::serialize () {

  // this buffer is protected by the lock of PersFifo

  static char statstr[maxUrlSize+40+maxCookieSize];

  int pos = sprintf(statstr, "%u ", depth);

#ifdef URL_TAGS

  pos += sprintf(statstr+pos, "%u ", tag);

#endif // URL_TAGS

  pos += sprintf(statstr+pos, "%s:%u%s", host, port, file);

#ifdef COOKIES

  if (cookie != NULL) {

    pos += sprintf(statstr+pos, " %s", cookie);

  }

#endif // COOKIES

  statstr[pos] = '\n';

  statstr[pos+1] = 0;

  return statstr;

}



/* very thread unsafe serialisation in a static buffer */

char *url::getUrl() {

  static char statstr[maxUrlSize+40];

  sprintf(statstr, "http://%s:%u%s", host, port, file);

  return statstr;

}



/* return a hashcode for the host of this url */

uint url::hostHashCode () {

  return siteHashCode (host);

}



/* return a hashcode for this url */

uint url::hashCode () {

  unsigned int h=port;

  unsigned int i=0;

  while (host[i] != 0) {

    h = 31*h + host[i];

    i++;

  }

  i=0;

  while (file[i] != 0) {

    h = 31*h + file[i];

    i++;

  }

  return h % hashSize;

}



/* parses a url : 

 * at the end, arg must have its initial state, 

 * http:// has allready been suppressed

 */

void url::parse (char *arg) {

  int deb = 0, fin = deb;

  // Find the end of host name (put it into lowerCase)

  while (arg[fin] != '/' && arg[fin] != ':' && arg[fin] != 0) {

    fin++;

  }

  if (fin == 0) return;



  // get host name

  host = new char[fin+1];

  for (int  i=0; i<fin; i++) {

    host[i] = lowerCase(arg[i]);

  }

  host[fin] = 0;



  // get port number

  if (arg[fin] == ':') {

    port = 0;

    fin++;

    while (arg[fin] >= '0' && arg[fin] <= '9') {

      port = port*10 + arg[fin]-'0';

      fin++;

    }

  }



  // get file name

  if (arg[fin] != '/') {

    // www.inria.fr => add the final /

    file = newString("/");

  } else {

    file = newString(arg + fin);

  }

}



/** parse a file with base

 */

void url::parseWithBase (char *u, url *base) {

  // cat filebase and file

  if (u[0] == '/') {

    file = newString(u);

  } else {

    uint lenb = strlen(base->file);

    char *tmp = new char[lenb + strlen(u) + 1];

    memcpy(tmp, base->file, lenb);

    strcpy(tmp + lenb, u);

    file = tmp;

  }

  if (!normalize(file)) {

    delete [] file;

    file = NULL;

    return;

  }

  host = newString(base->host);

  port = base->port;

}



/** normalize file name

 * return true if it is ok, false otherwise (cgi-bin)

 */

bool url::normalize (char *file) {

  return fileNormalize(file);

}



/* Does this url starts with a protocol name */

bool url::isProtocol (char *s) {

  uint i = 0;

  while (isalnum(s[i])) {

    i++;

  }

  return s[i] == ':';

}



#ifdef COOKIES

#define addToCookie(s) len = strlen(cookie); \

    strncpy(cookie+len, s, maxCookieSize-len); \

    cookie[maxCookieSize-1] = 0;



/* see if a header contain a new cookie */

void url::addCookie(char *header) {

  if (startWithIgnoreCase("set-cookie: ", header)) {

    char *pos = strchr(header+12, ';');

    if (pos != NULL) {

      int len;

      if (cookie == NULL) {

        cookie = new char[maxCookieSize];

        cookie[0] = 0;

      } else {

        addToCookie("; ");

      }

      *pos = 0;

      addToCookie(header+12);

      *pos = ';';

    }

  }

}

#endif // COOKIES
View Code

global::namedSiteList

NamedSite *global::namedSiteList;

namedSiteList = new NamedSite[namedSiteListSize];
爬虫Larbin解析(二)——sequencer()
class NamedSite 

{

    private:

        /* string used for following CNAME chains (just one jump) */

        char *cname;

        /** we've got a good dns answer

        * get the robots.txt */

        void dnsOK ();

        /** Cannot get the inet addr

        * dnsState must have been set properly before the call */

        void dnsErr ();

        /** Delete the old identity of the site */

        void newId ();

        /** put this url in its IPSite */

        void transfer (url *u);

        /** forget this url for this reason */

        void forgetUrl (url *u, FetchError reason);

    public:

        /** Constructor */

        NamedSite ();

        /** Destructor : never used */

        ~NamedSite ();

        /* name of the site */

        char name[maxSiteSize];

        /* port of the site */

        uint16_t port;

        /* numbers of urls in ram for this site */

        uint16_t nburls;

        /* fifo of urls waiting to be fetched */

        url *fifo[maxUrlsBySite];

        uint8_t inFifo;

        uint8_t outFifo;

        void putInFifo(url *u);

        url *getInFifo();

        short fifoLength();

        /** Is this Site in a dnsSites */

        bool isInFifo;

        /** internet addr of this server */

        char dnsState;

        struct in_addr addr;

        uint ipHash;

        /* Date of expiration of dns call and robots.txt fetch */

        time_t dnsTimeout;

        /** test if a file can be fetched thanks to the robots.txt */

        bool testRobots(char *file);

        /* forbidden paths : given by robots.txt */

        Vector<char> forbidden;

        /** Put an url in the fifo

        * If there are too much, put it back in UrlsInternal

        * Never fill totally the fifo => call at least with 1 */

        void putGenericUrl(url *u, int limit, bool prio);

        inline void putUrl (url *u) { putGenericUrl(u, 15, false); }

        inline void putUrlWait (url *u) { putGenericUrl(u, 10, false); }

        inline void putPriorityUrl (url *u) { putGenericUrl(u, 5, true); }

        inline void putPriorityUrlWait (url *u) { putGenericUrl(u, 1, true); }

        /** Init a new dns query */

        void newQuery ();

        /** The dns query ended with success */

        void dnsAns (adns_answer *ans);

        /** we got the robots.txt, transfer what must be in IPSites */

        void robotsResult (FetchError res);

};
View Code
爬虫Larbin解析(二)——sequencer()
///////////////////////////////////////////////////////////

// class NamedSite

///////////////////////////////////////////////////////////



/** Constructor : initiate fields used by the program

 */

NamedSite::NamedSite () 

{

  name[0] = 0;

  nburls = 0;

  inFifo = 0; outFifo = 0;

  isInFifo = false;

  dnsState = waitDns;

  cname = NULL;

}



/** Destructor : This one is never used

 */

NamedSite::~NamedSite () {

  assert(false);

}



/* Management of the Fifo */

void NamedSite::putInFifo(url *u) {

  fifo[inFifo] = u;

  inFifo = (inFifo + 1) % maxUrlsBySite;

  assert(inFifo!=outFifo);

}



url *NamedSite::getInFifo() {

  assert (inFifo != outFifo);

  url *tmp = fifo[outFifo];

  outFifo = (outFifo + 1) % maxUrlsBySite;

  return tmp;

}



short NamedSite::fifoLength() {

  return (inFifo + maxUrlsBySite - outFifo) % maxUrlsBySite;

}



/* Put an url in the fifo if their are not too many */

void NamedSite::putGenericUrl(url *u, int limit, bool prio) {

  if (nburls > maxUrlsBySite-limit) {

    // Already enough Urls in memory for this Site

    // first check if it can already be forgotten

    if (!strcmp(name, u->getHost())) {

      if (dnsState == errorDns) {

        nburls++;

        forgetUrl(u, noDNS);

        return;

      }

      if (dnsState == noConnDns) {

        nburls++;

        forgetUrl(u, noConnection);

        return;

      }

      if (u->getPort() == port

          && dnsState == doneDns && !testRobots(u->getFile())) {

        nburls++;

        forgetUrl(u, forbiddenRobots);

        return;

      }

    }

    // else put it back in URLsDisk

    refUrl();

    global::inter->getOne();

    if (prio) {

      global::URLsPriorityWait->put(u);

    } else {

      global::URLsDiskWait->put(u);

    }

  } else {

    nburls++;

    if (dnsState == waitDns

        || strcmp(name, u->getHost())

        || port != u->getPort()

        || global::now > dnsTimeout) {

      // dns not done or other site

      putInFifo(u);

      addNamedUrl();

      // Put Site in fifo if not yet in

      if (!isInFifo) {

        isInFifo = true;

        global::dnsSites->put(this);

      }

    } else switch (dnsState) {

    case doneDns:

      transfer(u);

      break;

    case errorDns:

      forgetUrl(u, noDNS);

      break;

    default: // noConnDns

      forgetUrl(u, noConnection);

    }

  }

}



/** Init a new dns query

 */

void NamedSite::newQuery () 

{

    // Update our stats

    newId();

    if (global::proxyAddr != NULL) 

    {

        // we use a proxy, no need to get the sockaddr

        // give anything for going on

        siteSeen();

        siteDNS();

        // Get the robots.txt

        dnsOK();

    } 

    else if (isdigit(name[0])) 

    {

        // the name already in numbers-and-dots notation

        siteSeen();

        if (inet_aton(name, &addr)) 

        {

              // Yes, it is in numbers-and-dots notation

              siteDNS();

              // Get the robots.txt

              dnsOK();

        } 

        else 

        {

            // No, it isn't : this site is a non sense

            dnsState = errorDns;

            dnsErr();

        }

    } 

    else 

    {

        // submit an adns query

        global::nbDnsCalls++;

        adns_query quer = NULL;

        adns_submit(global::ads, name,

                    (adns_rrtype) adns_r_addr,

                    (adns_queryflags) 0,

                    this, &quer);

    }

}



/** The dns query ended with success

 * assert there is a freeConn

 */

void NamedSite::dnsAns (adns_answer *ans) 

{

    if (ans->status == adns_s_prohibitedcname) 

    {

        if (cname == NULL) 

        {

            // try to find ip for cname of cname

            cname = newString(ans->cname);

            global::nbDnsCalls++;

            adns_query quer = NULL;

            adns_submit(global::ads, cname,

                  (adns_rrtype) adns_r_addr,

                  (adns_queryflags) 0,

                  this, &quer);

        } 

        else 

        {

            // dns chains too long => dns error

            // cf nslookup or host for more information

            siteSeen();

            delete [] cname; cname = NULL;

            dnsState = errorDns;

            dnsErr();

        }

    } 

    else 

    {

        siteSeen();

        if (cname != NULL) 

        { 

            delete [] cname; 

            cname = NULL; 

        }

        if (ans->status != adns_s_ok) 

        {

          // No addr inet

          dnsState = errorDns;

          dnsErr();

        } 

        else 

        {

          siteDNS();

          // compute the new addr

          memcpy (&addr,

                  &ans->rrs.addr->addr.inet.sin_addr,

                  sizeof (struct in_addr));

          // Get the robots.txt

          dnsOK();

        }

    }

}



/** we've got a good dns answer

 * get the robots.txt

 * assert there is a freeConn

 */

void NamedSite::dnsOK () {

  Connexion *conn = global::freeConns->get();

  char res = getFds(conn, &addr, port);

  if (res != emptyC) {

    conn->timeout = timeoutPage;

    if (global::proxyAddr != NULL) {

      // use a proxy

      conn->request.addString("GET http://");

      conn->request.addString(name);

      char tmp[15];

      sprintf(tmp, ":%u", port);

      conn->request.addString(tmp);

      conn->request.addString("/robots.txt HTTP/1.0\r\nHost: ");

    } else {

      // direct connection

      conn->request.addString("GET /robots.txt HTTP/1.0\r\nHost: ");

    }

    conn->request.addString(name);

    conn->request.addString(global::headersRobots);

    conn->parser = new robots(this, conn);

    conn->pos = 0;

    conn->err = success;

    conn->state = res;

  } else {

    // Unable to get a socket

    global::freeConns->put(conn);

    dnsState = noConnDns;

    dnsErr();

  }

}



/** Cannot get the inet addr

 * dnsState must have been set properly before the call

 */

void NamedSite::dnsErr () {

  FetchError theErr;

  if (dnsState == errorDns) {

    theErr = noDNS;

  } else {

    theErr = noConnection;

  }

  int ss = fifoLength();

  // scan the queue

  for (int i=0; i<ss; i++) {

    url *u = getInFifo();

    if (!strcmp(name, u->getHost())) {

      delNamedUrl();

      forgetUrl(u, theErr);

    } else { // different name

      putInFifo(u);

    }

  }

  // where should now lie this site

  if (inFifo==outFifo) {

    isInFifo = false;

  } else {

    global::dnsSites->put(this);

  }

}



/** test if a file can be fetched thanks to the robots.txt */

bool NamedSite::testRobots(char *file) {

  uint pos = forbidden.getLength();

  for (uint i=0; i<pos; i++) {

    if (robotsMatch(forbidden[i], file))

      return false;

  }

  return true;

}



/** Delete the old identity of the site */

void NamedSite::newId () {

  // ip expires or new name or just new port

  // Change the identity of this site

#ifndef NDEBUG

  if (name[0] == 0) {

    addsite();

  }

#endif // NDEBUG

  url *u = fifo[outFifo];

  strcpy(name, u->getHost());

  port = u->getPort();

  dnsTimeout = global::now + dnsValidTime;

  dnsState = waitDns;

}



/** we got the robots.txt,

 * compute ipHashCode

 * transfer what must be in IPSites

 */

void NamedSite::robotsResult (FetchError res) {

  bool ok = res != noConnection;

  if (ok) {

    dnsState = doneDns;

    // compute ip hashcode

    if (global::proxyAddr == NULL) {

      ipHash=0;

      char *s = (char *) &addr;

      for (uint i=0; i<sizeof(struct in_addr); i++) {

        ipHash = ipHash*31 + s[i];

      }

    } else {

      // no ip and need to avoid rapidFire => use hostHashCode

      ipHash = this - global::namedSiteList;

    }

    ipHash %= IPSiteListSize;

  } else {

    dnsState = noConnDns;

  }

  int ss = fifoLength();

  // scan the queue

  for (int i=0; i<ss; i++) {

    url *u = getInFifo();

    if (!strcmp(name, u->getHost())) {

      delNamedUrl();

      if (ok) {

        if (port == u->getPort()) {

          transfer(u);

        } else {

          putInFifo(u);

        }

      } else {

        forgetUrl(u, noConnection);

      }

    } else {

      putInFifo(u);

    }

  }

  // where should now lie this site

  if (inFifo==outFifo) {

    isInFifo = false;

  } else {

    global::dnsSites->put(this);

  }  

}



void NamedSite::transfer (url *u) {

  if (testRobots(u->getFile())) {

    if (global::proxyAddr == NULL) {

      memcpy (&u->addr, &addr, sizeof (struct in_addr));

    }

    global::IPSiteList[ipHash].putUrl(u);

  } else {

    forgetUrl(u, forbiddenRobots);

  }

}



void NamedSite::forgetUrl (url *u, FetchError reason) {

  urls();

  fetchFail(u, reason);

  answers(reason);

  nburls--;

  delete u;

  global::inter->getOne();

}
View Code

 

其中两个类的定义

larbin-2.6.3/src/utils/PersistentFifo.h、larbin-2.6.3/src/utils/PersistentFifo.cc

爬虫Larbin解析(二)——sequencer()
// Larbin

// Sebastien Ailleret

// 06-01-00 -> 12-06-01



/* this fifo is stored on disk */



#ifndef PERSFIFO_H

#define PERSFIFO_H



#include <dirent.h>

#include <unistd.h>

#include <sys/types.h>

#include <sys/stat.h>

#include <fcntl.h>

#include <errno.h>

#include <string.h>



#include "types.h"

#include "utils/url.h"

#include "utils/text.h"

#include "utils/connexion.h"

#include "utils/mypthread.h"



class PersistentFifo 

{

    protected:

        uint in, out;

        #ifdef THREAD_OUTPUT

        pthread_mutex_t lock;

        #endif

        // number of the file used for reading

        uint fileNameLength;

        // name of files

        int fin, fout;

        char *fileName;



    protected:

        // Make fileName fit with this number

        void makeName(uint nb);

        // Give a file name for this int

        int getNumber(char *file);

        // Change the file used for reading

        void updateRead ();

        // Change the file used for writing

        void updateWrite ();



    protected:

        // buffer used for readLine

        char outbuf[BUF_SIZE];

        // number of char used in this buffer

        uint outbufPos;

        // buffer used for readLine

        char buf[BUF_SIZE];

        // number of char used in this buffer

        uint bufPos, bufEnd;

        // sockets for reading and writing

        int rfds, wfds;

    protected:

        // read a line on rfds

        char *readLine ();

        // write an url in the out file (buffered write)

        void writeUrl (char *s);

        // Flush the out Buffer in the outFile

        void flushOut ();



    public:

        PersistentFifo (bool reload, char *baseName);

        ~PersistentFifo ();



        /* get the first object (non totally blocking)

        * return NULL if there is none

        */

        url *tryGet ();



        /* get the first object (non totally blocking)

        * probably crash if there is none

        */

        url *get ();



        /* add an object in the fifo */

        void put (url *obj);



        /* how many items are there inside ? */

        int getLength ();

};



#endif // PERSFIFO_H
View Code
爬虫Larbin解析(二)——sequencer()
// Larbin

// Sebastien Ailleret

// 27-05-01 -> 04-01-02



#include <string.h>

#include <assert.h>

#include <stdlib.h>

#include <stdio.h>

#include <string.h>

#include <ctype.h>

#include <iostream.h>



#include "types.h"

#include "global.h"

#include "utils/mypthread.h"

#include "utils/PersistentFifo.h"



PersistentFifo::PersistentFifo (bool reload, char *baseName) 

{

  fileNameLength = strlen(baseName)+5;

  fileName = new char[fileNameLength+2];

  strcpy(fileName, baseName);

  fileName[fileNameLength+1] = 0;

  outbufPos = 0;

  bufPos = 0;

  bufEnd = 0;

  mypthread_mutex_init(&lock, NULL);

  if (reload) 

  {

    DIR *dir = opendir(".");

    struct dirent *name;



    fin = -1;

    fout = -1;

    name = readdir(dir);

    while (name != NULL) 

    {

      if (startWith(fileName, name->d_name)) 

      {

        int tmp = getNumber(name->d_name);

        if (fin == -1) 

        {

          fin = tmp;

          fout = tmp;

        } 

        else 

        {

          if (tmp > fin)  { fin = tmp; }

          if (tmp < fout) { fout = tmp; }

        }

      }

      name = readdir(dir);

    }

    if (fin == -1) 

    {

      fin = 0;

      fout = 0;

    }

    if (fin == fout && fin != 0) 

    {

      cerr << "previous crawl was too little, cannot reload state\n"

           << "please restart larbin with -scratch option\n";

      exit(1);

    }

    closedir(dir);

    in = (fin - fout) * urlByFile;

    out = 0;

    makeName(fin);

    wfds = creat (fileName, S_IRUSR | S_IWUSR);

    makeName(fout);

    rfds = open (fileName, O_RDONLY);

  } 

  else 

  {

    // Delete old fifos

    DIR *dir = opendir(".");

    struct dirent *name;

    name = readdir(dir);

    while (name != NULL) 

    {

      if (startWith(fileName, name->d_name)) 

      {

        unlink(name->d_name);

      }

      name = readdir(dir);

    }

    closedir(dir);



    fin = 0;

    fout = 0;

    in = 0;

    out = 0;

    makeName(0);

    wfds = creat (fileName, S_IRUSR | S_IWUSR);

    rfds = open (fileName, O_RDONLY);

  }

}



PersistentFifo::~PersistentFifo () 

{

  mypthread_mutex_destroy (&lock);

  close(rfds);

  close(wfds);

}



url *PersistentFifo::tryGet () 

{

  url *tmp = NULL;

  mypthread_mutex_lock(&lock);

  if (in != out) 

  {

    // The stack is not empty

    char *line = readLine();

    tmp = new url(line);

    out++;

    updateRead();

  }

  mypthread_mutex_unlock(&lock);

  return tmp;

}



url *PersistentFifo::get () 

{

  mypthread_mutex_lock(&lock);

  char *line = readLine();

  url *res = new url(line);

  out++;

  updateRead();

  mypthread_mutex_unlock(&lock);

  return res;

}



/** Put something in the fifo

 * The objet is then deleted

 */

void PersistentFifo::put (url *obj) 

{

  mypthread_mutex_lock(&lock);

  char *s = obj->serialize(); // statically allocated string

  writeUrl(s);

  in++;

  updateWrite();

  mypthread_mutex_unlock(&lock);

  delete obj;

} 

int PersistentFifo::getLength () 

{

  return in - out;

}



void PersistentFifo::makeName (uint nb) 

{

  for (uint i=fileNameLength; i>=fileNameLength-5; i--) 

  {

    fileName[i] = (nb % 10) + '0';

    nb /= 10;

  }

}



int PersistentFifo::getNumber (char *file) 

{

  uint len = strlen(file);

  int res = 0;

  for (uint i=len-6; i<=len-1; i++) 

  {

    res = (res * 10) + file[i] - '0';

  }

  return res;

}



void PersistentFifo::updateRead () 

{

  if ((out % urlByFile) == 0) 

  {

    close(rfds);

    makeName(fout);

    unlink(fileName);

    makeName(++fout);

    rfds = open(fileName, O_RDONLY);

    in -= out;

    out = 0;

    assert(bufPos == bufEnd);

  }

}



void PersistentFifo::updateWrite () 

{

  if ((in % urlByFile) == 0) 

  {

    flushOut();

    close(wfds);

    makeName(++fin);

    wfds = creat(fileName, S_IRUSR | S_IWUSR);

#ifdef RELOAD

    global::seen->save();

#ifdef NO_DUP

    global::hDuplicate->save();

#endif

#endif

  }

}



/* read a line from the file

 * uses a buffer

 */

char *PersistentFifo::readLine () {

  if (bufPos == bufEnd) {

    bufPos = 0; bufEnd = 0; buf[0] = 0;

  }

  char *posn = strchr(buf + bufPos, '\n');

  while (posn == NULL) {

    if (!(bufEnd - bufPos < maxUrlSize + 40 + maxCookieSize)) {

      printf(fileName);

      printf(buf+bufPos);

    }

    if (bufPos*2 > BUF_SIZE) {

      bufEnd -= bufPos;

      memmove(buf, buf+bufPos, bufEnd);

      bufPos = 0;

    }

    int postmp = bufEnd;

    bool noRead = true;

    while (noRead) {

      int rd = read(rfds, buf+bufEnd, BUF_SIZE-1-bufEnd);

      switch (rd) {

      case 0 :

        // We need to flush the output in order to read it

        flushOut();

        break;

      case -1 :

        // We have a trouble here

        if (errno != EINTR) {

          cerr << "Big Problem while reading (persistentFifo.h)\n";

          perror("reason");

          assert(false);

        } else {

          perror("Warning in PersistentFifo: ");

        }

        break;

      default:

        noRead = false;

        bufEnd += rd;

        buf[bufEnd] = 0;

        break;

      }

    }

    posn = strchr(buf + postmp, '\n');

  }

  *posn = 0;

  char *res = buf + bufPos;

  bufPos = posn + 1 - buf;

  return res;

}



// write an url in the out file (buffered write)

void PersistentFifo::writeUrl (char *s) {

  size_t len = strlen(s);

  assert(len < maxUrlSize + 40 + maxCookieSize);

  if (outbufPos + len < BUF_SIZE) {

    memcpy(outbuf + outbufPos, s, len);

    outbufPos += len;

  } else {

    // The buffer is full

    flushOut ();

    memcpy(outbuf + outbufPos, s, len);

    outbufPos = len;

  }

}



// Flush the out Buffer in the outFile

void PersistentFifo::flushOut () {

  ecrireBuff (wfds, outbuf, outbufPos);

  outbufPos = 0;

}
View Code

Larbin-2.6.3/src/utils/syncFifo.h

爬虫Larbin解析(二)——sequencer()
// Larbin

// Sebastien Ailleret

// 09-11-99 -> 07-12-01



/* fifo in RAM with synchronisations */



#ifndef SYNCFIFO_H

#define SYNCFIFO_H



#define std_size 100



#include "utils/mypthread.h"



template <class T>

class SyncFifo {

 protected:

  uint in, out;

  uint size;

  T **tab;

#ifdef THREAD_OUTPUT

  pthread_mutex_t lock;

  pthread_cond_t nonEmpty;

#endif



 public:

  /* Specific constructor */

  SyncFifo (uint size = std_size);



  /* Destructor */

  ~SyncFifo ();



  /* get the first object */

  T *get ();



  /* get the first object (non totally blocking)

   * return NULL if there is none

   */

  T *tryGet ();



  /* add an object in the Fifo */

  void put (T *obj);



  /* how many itmes are there inside ? */

  int getLength ();

};



template <class T>

SyncFifo<T>::SyncFifo (uint size) {

  tab = new T*[size];

  this->size = size;

  in = 0;

  out = 0;

  mypthread_mutex_init (&lock, NULL);

  mypthread_cond_init (&nonEmpty, NULL);

}



template <class T>

SyncFifo<T>::~SyncFifo () {

  delete [] tab;

  mypthread_mutex_destroy (&lock);

  mypthread_cond_destroy (&nonEmpty);

}



template <class T>

T *SyncFifo<T>::get () {

  T *tmp;

  mypthread_mutex_lock(&lock);

  mypthread_cond_wait(in == out, &nonEmpty, &lock);

  tmp = tab[out];

  out = (out + 1) % size;

  mypthread_mutex_unlock(&lock);

  return tmp;

}



template <class T>

T *SyncFifo<T>::tryGet () {

  T *tmp = NULL;

  mypthread_mutex_lock(&lock);

  if (in != out) {

    // The stack is not empty

    tmp = tab[out];

    out = (out + 1) % size;

  }

  mypthread_mutex_unlock(&lock);

  return tmp;

}



template <class T>

void SyncFifo<T>::put (T *obj) {

  mypthread_mutex_lock(&lock);

  tab[in] = obj;

  if (in == out) {

    mypthread_cond_broadcast(&nonEmpty);

  }

  in = (in + 1) % size;

  if (in == out) {

    T **tmp;

    tmp = new T*[2*size];

    for (uint i=out; i<size; i++) {

      tmp[i] = tab[i];

    }

    for (uint i=0; i<in; i++) {

      tmp[i+size] = tab[i];

    }

    in += size;

    size *= 2;

    delete [] tab;

    tab = tmp;

  }

  mypthread_mutex_unlock(&lock);

}



template <class T>

int SyncFifo<T>::getLength () {

  int tmp;

  mypthread_mutex_lock(&lock);

  tmp = (in + size - out) % size;

  mypthread_mutex_unlock(&lock);

  return tmp;

}



#endif // SYNCFIFO_H
View Code

 

你可能感兴趣的:(sequence)