[转] 贴Snoopy.class.php代码学习参考


Snoopy.class.php 是一个关于HTTP协议访问操作的类库,主要是使用在 MagpieRSS 中用于远程文件的抓取,我原来转载的一篇文章大致有简单的介绍这个东西,今天无聊,把代码帖出来,大家参考学习。

<? php

/* ************************************************

Snoopy-thePHPnetclient
Author:MonteOhrt<[email protected]>
Copyright(c):1999-2000ispi,allrightsreserved
Version:1.01

*Thislibraryisfreesoftware;youcanredistributeitand/or
*modifyitunderthetermsoftheGNULesserGeneralPublic
*LicenseaspublishedbytheFreeSoftwareFoundation;either
*version2.1oftheLicense,or(atyouroption)anylaterversion.
*
*Thislibraryisdistributedinthehopethatitwillbeuseful,
*butWITHOUTANYWARRANTY;withouteventheimpliedwarrantyof
*MERCHANTABILITYorFITNESSFORAPARTICULARPURPOSE.SeetheGNU
*LesserGeneralPublicLicenseformoredetails.
*
*YoushouldhavereceivedacopyoftheGNULesserGeneralPublic
*Licensealongwiththislibrary;ifnot,writetotheFreeSoftware
*Foundation,Inc.,59TemplePlace,Suite330,Boston,MA02111-1307USA

YoumaycontacttheauthorofSnoopybye-mailat:
[email protected]

Or,writeto:
MonteOhrt
CTO,ispi
237S.70thsuite220
Lincoln,NE68510

ThelatestversionofSnoopycanbeobtainedfrom:
http://snoopy.sourceforge.net/

************************************************
*/

class Snoopy
{
/* ***Publicvariables*** */

/* userdefinablevars */

var $host = " www.php.net " ; // hostnameweareconnectingto
var $port = 80 ; // portweareconnectingto
var $proxy_host = "" ; // proxyhosttouse
var $proxy_port = "" ; // proxyporttouse
var $proxy_user = "" ; // proxyusertouse
var $proxy_pass = "" ; // proxypasswordtouse

var $agent = " Snoopyv1.2.3 " ; // agentwemasqueradeas
var $referer = "" ; // refererinfotopass
var $cookies = array (); // arrayofcookiestopass
//$cookies["username"]="joe";

var $rawheaders = array (); // arrayofrawheaderstosend
//$rawheaders["Content-type"]="text/html";


var $maxredirs = 5 ; // httpredirectiondepthmaximum.0=disallow
var $lastredirectaddr = "" ; // containsaddressoflastredirectedaddress
var $offsiteok = true ; // allowsredirectionoff-site
var $maxframes = 0 ; // framecontentdepthmaximum.0=disallow
var $expandlinks = true ; // expandlinkstofullyqualifiedURLs.
//thisonlyappliestofetchlinks()
//submitlinks(),andsubmittext()

var $passcookies = true ; // passsetcookiesbackthroughredirects
//NOTE:thiscurrentlydoesnotrespect
//dates,domainsorpaths.


var $user = "" ; // userforhttpauthentication
var $pass = "" ; // passwordforhttpauthentication

//httpaccepttypes

var $accept = " image/gif,image/x-xbitmap,image/jpeg,image/pjpeg,*/* " ;

var $results = "" ; // wherethecontentisput

var $error = "" ; // errormessagessenthere
var $response_code = "" ; // responsecodereturnedfromserver
var $headers = array (); // headersreturnedfromserversenthere
var $maxlength = 500000 ; // maxreturndatalength(body)
var $read_timeout = 0 ; // timeoutonreadoperations,inseconds
//supportedonlysincePHP4Beta4
//setto0todisallowtimeouts

var $timed_out = false ; // ifareadoperationtimedout
var $status = 0 ; // httprequeststatus

var $temp_dir = " /tmp " ; // temporarydirectorythatthewebserver
//haspermissiontowriteto.
//underWindows,thisshouldbeC: emp


var $curl_path = " /usr/local/bin/curl " ;
// SnoopywillusecURLforfetching
//SSLcontentifafullsystempathto
//thecURLbinaryissuppliedhere.
//settofalseifyoudonothave
//cURLinstalled.Seehttp://curl.haxx.se
//fordetailsoninstallingcURL.
//Snoopydoes*not*usethecURL
//libraryfunctionsbuiltintophp,
//asthesefunctionsarenotstable
//asofthisSnoopyrelease.


/* ***Privatevariables*** */

var $_maxlinelen = 4096 ; // maxlinelength(headers)

var $_httpmethod = " GET " ; // defaulthttprequestmethod
var $_httpversion = " HTTP/1.0 " ; // defaulthttprequestversion
var $_submit_method = " POST " ; // defaultsubmitmethod
var $_submit_type = " application/x-www-form-urlencoded " ; // defaultsubmittype
var $_mime_boundary = "" ; // MIMEboundaryformultipart/form-datasubmittype
var $_redirectaddr = false ; // willbesetifpagefetchedisaredirect
var $_redirectdepth = 0 ; // incrementsonanhttpredirect
var $_frameurls = array (); // framesrcurls
var $_framedepth = 0 ; // incrementsonframedepth

var $_isproxy = false ; // setifusingaproxyserver
var $_fp_timeout = 30 ; // timeoutforsocketconnection

/* ======================================================================*
Function:fetch
Purpose:fetchthecontentsofawebpage
(andpossiblyotherprotocolsinthe
futurelikeftp,nntp,gopher,etc.)
Input:$URIthelocationofthepagetofetch
Output:$this->resultstheoutputtextfromthefetch
*======================================================================
*/

function fetch( $URI )
{

// preg_match("|^([^:]+)://([^:/]+)(:[d]+)*(.*)|",$URI,$URI_PARTS);
$URI_PARTS = parse_url ( $URI );
if ( ! empty ( $URI_PARTS [ " user " ]))
$this -> user = $URI_PARTS [ " user " ];
if ( ! empty ( $URI_PARTS [ " pass " ]))
$this -> pass = $URI_PARTS [ " pass " ];
if ( empty ( $URI_PARTS [ " query " ]))
$URI_PARTS [ " query " ] = '' ;
if ( empty ( $URI_PARTS [ " path " ]))
$URI_PARTS [ " path " ] = '' ;

switch ( strtolower ( $URI_PARTS [ " scheme " ]))
{
case " http " :
$this -> host = $URI_PARTS [ " host " ];
if ( ! empty ( $URI_PARTS [ " port " ]))
$this -> port = $URI_PARTS [ " port " ];
if ( $this -> _connect( $fp ))
{
if ( $this -> _isproxy)
{
// usingproxy,sendentireURI
$this -> _httprequest( $URI , $fp , $URI , $this -> _httpmethod);
}
else
{
$path = $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ] ? " ? " . $URI_PARTS [ " query " ] : "" );
// noproxy,sendonlythepath
$this -> _httprequest( $path , $fp , $URI , $this -> _httpmethod);
}

$this -> _disconnect( $fp );

if ( $this -> _redirectaddr)
{
/* urlwasredirected,checkifwe'vehitthemaxdepth */
if ( $this -> maxredirs > $this -> _redirectdepth)
{
// onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr) || $this -> offsiteok)
{
/* followtheredirect */
$this -> _redirectdepth ++ ;
$this -> lastredirectaddr = $this -> _redirectaddr;
$this -> fetch( $this -> _redirectaddr);
}
}
}

if ( $this -> _framedepth < $this -> maxframes && count ( $this -> _frameurls) > 0 )
{
$frameurls = $this -> _frameurls;
$this -> _frameurls = array ();

while ( list ( , $frameurl ) = each ( $frameurls ))
{
if ( $this -> _framedepth < $this -> maxframes)
{
$this -> fetch( $frameurl );
$this -> _framedepth ++ ;
}
else
break ;
}
}
}
else
{
return false ;
}
return true ;
break ;
case " https " :
if ( ! $this -> curl_path)
return false ;
if ( function_exists ( " is_executable " ))
if ( ! is_executable ( $this -> curl_path))
return false ;
$this -> host = $URI_PARTS [ " host " ];
if ( ! empty ( $URI_PARTS [ " port " ]))
$this -> port = $URI_PARTS [ " port " ];
if ( $this -> _isproxy)
{
// usingproxy,sendentireURI
$this -> _httpsrequest( $URI , $URI , $this -> _httpmethod);
}
else
{
$path = $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ] ? " ? " . $URI_PARTS [ " query " ] : "" );
// noproxy,sendonlythepath
$this -> _httpsrequest( $path , $URI , $this -> _httpmethod);
}

if ( $this -> _redirectaddr)
{
/* urlwasredirected,checkifwe'vehitthemaxdepth */
if ( $this -> maxredirs > $this -> _redirectdepth)
{
// onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr) || $this -> offsiteok)
{
/* followtheredirect */
$this -> _redirectdepth ++ ;
$this -> lastredirectaddr = $this -> _redirectaddr;
$this -> fetch( $this -> _redirectaddr);
}
}
}

if ( $this -> _framedepth < $this -> maxframes && count ( $this -> _frameurls) > 0 )
{
$frameurls = $this -> _frameurls;
$this -> _frameurls = array ();

while ( list ( , $frameurl ) = each ( $frameurls ))
{
if ( $this -> _framedepth < $this -> maxframes)
{
$this -> fetch( $frameurl );
$this -> _framedepth ++ ;
}
else
break ;
}
}
return true ;
break ;
default :
// notavalidprotocol
$this -> error = ' Invalidprotocol" ' . $URI_PARTS [ " scheme " ] . ' " ' ;
return false ;
break ;
}
return true ;
}

/* ======================================================================*
Function:submit
Purpose:submitanhttpform
Input:$URIthelocationtopostthedata
$formvarstheformvarstouse.
format:$formvars["var"]="val";
$formfilesanarrayoffilestosubmit
format:$formfiles["var"]="/dir/filename.ext";
Output:$this->resultsthetextoutputfromthepost
*======================================================================
*/

function submit( $URI , $formvars = "" , $formfiles = "" )
{
unset ( $postdata );

$postdata = $this -> _prepare_post_body( $formvars , $formfiles );

$URI_PARTS = parse_url ( $URI );
if ( ! empty ( $URI_PARTS [ " user " ]))
$this -> user = $URI_PARTS [ " user " ];
if ( ! empty ( $URI_PARTS [ " pass " ]))
$this -> pass = $URI_PARTS [ " pass " ];
if ( empty ( $URI_PARTS [ " query " ]))
$URI_PARTS [ " query " ] = '' ;
if ( empty ( $URI_PARTS [ " path " ]))
$URI_PARTS [ " path " ] = '' ;

switch ( strtolower ( $URI_PARTS [ " scheme " ]))
{
case " http " :
$this -> host = $URI_PARTS [ " host " ];
if ( ! empty ( $URI_PARTS [ " port " ]))
$this -> port = $URI_PARTS [ " port " ];
if ( $this -> _connect( $fp ))
{
if ( $this -> _isproxy)
{
// usingproxy,sendentireURI
$this -> _httprequest( $URI , $fp , $URI , $this -> _submit_method , $this -> _submit_type , $postdata );
}
else
{
$path = $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ] ? " ? " . $URI_PARTS [ " query " ] : "" );
// noproxy,sendonlythepath <b
分享到:
评论

你可能感兴趣的:(PHP,.net,Scheme,ext,FP)