larbin中的TCP连接

// Larbin
// Sebastien Ailleret
// 07-03-00 -> 07-03-00

#include <string.h>
#include <unistd.h>
#include <iostream.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <sys/time.h>
#include <sys/resource.h>

#include "global.h"
#include "xfetcher/fetchOpen.h"
#include "xutils/text.h"
#include "xutils/connexion.h"

#include "xutils/debug.h"

static void readAll (int fds);
static void writeRes (int fds);

static char *startDate;

///////////////////////////////////////////////////////////////////////////////////////
//
//函数功能:提供远程通过web查询,获取爬虫状态的信息
//参数:无
//返回值:void
//
///////////////////////////////////////////////////////////////////////////////////////
void webserver () 
{
  // bind the socket
  int fds;
  int nAllowReuse = 1;
  struct sockaddr_in addr;
  time_t now = time(NULL);
  
  startDate = newString(ctime(&now)); //获得当前的时间
  
  bzero (&addr, sizeof(addr));
  
  addr.sin_addr.s_addr = INADDR_ANY;
  addr.sin_family = AF_INET;
  addr.sin_port = htons(global::httpPort);  //httpPort 用通过web看Larbin抓取的统计情况的接口
  	
  //以服务器方式建立TCP连接
  if ((fds = socket(AF_INET, SOCK_STREAM, 0)) == -1
	  || setsockopt(fds, SOL_SOCKET, SO_REUSEADDR, (char*)&nAllowReuse, sizeof(nAllowReuse))
	  || bind(fds, (struct sockaddr *) &addr, sizeof(addr)) != 0
	  || listen(fds, 4) != 0) 
	{
			cerr << "Unable to get the socket/n";
			exit(1);
  }
  // answer requests
  for (;;)  //这里是一个循环,来支持多次的web查询
  {
		struct sockaddr_in addrc;
		int fdc;
		uint len = sizeof(addr);
		fdc = accept(fds, (struct sockaddr *) &addrc, &len);
		if (fdc == -1) 
		{
			//accept 失败
	  	cerr << "Trouble with web server.../n";
		} 
		else 
		{
	  	readAll(fdc);
	  	writeRes(fdc);
		}//end if_else
		
  }//end for
}

///////////////////////////////////////////////////////////////
//
//函数功能:处理发送来的请求
//参数:int fds 创建的套接字
//返回值:void 
//注:没看出该函数实现了什么功能
//
////////////////////////////////////////////////////////////////////
static void readAll (int fds) 
{
  char c;
  int cont = 2;
  while (cont) 
  {
			if (read(fds, &c, 1) == 1) 
			{ 
				
				//如果收到的数据等于1,做下面处理
	  		switch (c) 
	  		{
	  			case '/r' :
							break;
	  			case '/n' :
							cont--;
							break;
	  			default :
							cont = 2;
							break;
	  		}//end switch
			} 
			else 
			{
					//将所有接收的数据都读取完毕后,cont置零,readAll函数结束,接着调用writeRes
	  			cont = 0;
			}//end if_else
  }//end while
}

///////////////////////////////////////////////////////////////////////////
//
//函数功能:向远程发送数据
//参数:int fds 套接字
//返回值:void
//
///////////////////////////////////////////////////////////////////////////
static void writeRes (int fds) 
{
  crash("Answer to a web query");
  // headers and html tags
  ecrire(fds, "HTTP/1.0 200 OK/r/nServer: Larbin/r/nContent-type: text/html/r/n/r/n<html>/n<head>/n<title>Larbin real time statistic</title>/n</head>/n<body bgcolor=/"#FFFFFF/">/n<center><h1>Larbin is up and running !</h1></center>/n");

  ecrire(fds, "/nStart date : ");
  ecrire(fds, startDate);
  ecrire(fds, "/n<br>Current date : ");
  time_t now = time(NULL);
  ecrire(fds, newString(ctime(&now)));

#ifndef NOSTATS
  if (global::isSpecific) {
	ecrire(fds, "/n<h2>Interesting pages (");
	ecrire(fds, global::contentType);
	ecrire(fds, ") :</h2>/ntotal Fetched (success) : ");
	ecrireInt(fds, interestingPage);
	ecrire(fds, "/n<br>total Fetched (error or success) : ");
	ecrireInt(fds, interestingSeen);
	if (global::privilegedExt != NULL) {
	  ecrire(fds, "/n<br>privileged links seen (");
	  ecrire(fds, global::privilegedExt);
	  ecrire(fds, ") : ");
	  ecrireInt(fds, interestingExtension);
	  ecrire(fds, "/n<br>privileged links fetched : ");
	  ecrireInt(fds, extensionTreated);
	}
  }

  ecrire(fds, "/n<h2>Pages :</h2>/nurls treated : ");
  ecrireInt(fds, urls);
  ecrire(fds, "/n<br>forbiddenRobots : ");
  ecrireInt(fds, answers[forbiddenRobots]);
  ecrire(fds, "/n<br>noDNS : ");
  ecrireInt(fds, answers[noDNS]);
  ecrire(fds, "/n<br>/n<br>Pages : ");
  ecrireInt(fds, pages);
  ecrire(fds, "/n<br>Success : ");
  ecrireInt(fds, answers[success]);
  ecrire(fds, "/n<br>no Connection : ");
  ecrireInt(fds, answers[noConnection]);
  ecrire(fds, "/n<br>early stop : ");
  ecrireInt(fds, answers[earlyStop]);
  ecrire(fds, "/n<br>timeout : ");
  ecrireInt(fds, answers[timeout]);
  ecrire(fds, "/n<br>badType : ");
  ecrireInt(fds, answers[badType]);
  ecrire(fds, "/n<br>tooBig : ");
  ecrireInt(fds, answers[tooBig]);
  ecrire(fds, "/n<br>err40X : ");
  ecrireInt(fds, answers[err40X]);
  ecrire(fds, "/n<br>/n<br>urls accepted : ");
  ecrireInt(fds, hashUrls);
  ecrire(fds, " / ");
  ecrireInt(fds, hashSize);

  ecrire(fds, "/n<h2>Sites seen (dns call done) :</h2>/ntotal number : ");
  ecrireInt(fds, siteSeen);
  ecrire(fds, " (+");
  ecrireInt(fds, nbCalls);
  ecrire(fds, ")/n<br>with dns : ");
  ecrireInt(fds, siteDNS);
  ecrire(fds, "/n<br>with robots.txt : ");
  ecrireInt(fds, siteRobots);
  ecrire(fds, "/n<br>with good robots.txt : ");
  ecrireInt(fds, robotsOK);

  ecrire(fds, "/n<h2>Fifos :</h2>/nurls on disk : ");
  ecrireInt(fds, global::URLsInternal->getLength());
  ecrire(fds, "/n<br>sites with ip addr and something to fetch : ");
  ecrireInt(fds,
			global::okSites->getLength()
			+ global::nb_conn
			- global::freeConns->getLength());
  ecrire(fds, "/n<br>sites without ip addr yet : ");
  ecrireInt(fds, global::dnsSites->getLength());
#endif // NOSTATS

#ifndef NDEBUG
  ecrire(fds, "/n<h2>Ressources Sharing :</h2>/nconnexions in use : ");
  ecrireInt(fds, global::nb_conn
			       - global::freeConns->getLength()
			       - global::userConns->getLength());
  ecrire(fds, "/n<br>connexions waiting user processing : ");
  ecrireInt(fds, global::userConns->getLength());
  ecrire(fds, "/n<br>free connexions : ");
  ecrireInt(fds, global::freeConns->getLength());
  ecrire(fds, "/n<br>parsers : ");
  ecrireInt(fds, debPars);
  ecrire(fds, "/n<br>sites in ram : ");
  ecrireInt(fds, sites);
  ecrire(fds, "/n<br>urls in ram : ");
  ecrireInt(fds, debUrl);

  ecrire(fds, "/n<h2>State of threads :</h2>/nstateBlock : ");
  ecrireInt(fds, stateBlock);
  ecrire(fds, "/n<br>stateNonBlock : ");
  ecrireInt(fds, stateNonBlock);
  ecrire(fds, "/n<br>statePipe : ");
  ecrireInt(fds, statePipe);

  ecrire(fds, "/n<h2>/proc/self/status :</h2>/n<pre>/n");
  int status = open("/proc/self/status", O_RDONLY);
  char *file = readfile(status);
  ecrire(fds, file);
  delete [] file;
  close(status);
  ecrire(fds, "</pre>");
  
#endif // NDEBUG

  // end of page and kill the connexion
  ecrire(fds, "/n<hr>/n<A HREF="/" mce_HREF="/""http://pauillac.inria.fr/~ailleret//"><img SRC="/" mce_SRC="/""http://pauillac.inria.fr/~ailleret/seb.gif/" ALT=/"ma photo/"></A>/n<A HREF="/" mce_HREF="/""mailto:[email protected]/">[email protected]</A>/n</body>/n</html>");
  shutdown(fds, 2);
  close(fds);
  
}

你可能感兴趣的:(Web,struct,socket,tcp,null,processing)