[转] 贴Snoopy.class.php代码学习参考


Snoopy.class.php 是一个关于HTTP协议访问操作的类库,主要是使用在 MagpieRSS 中用于远程文件的抓取,我原来转载的一篇文章大致有简单的介绍这个东西,今天无聊,把代码帖出来,大家参考学习。

<? php

/* ************************************************

Snoopy-thePHPnetclient
Author:MonteOhrt<[email protected]>
Copyright(c):1999-2000ispi,allrightsreserved
Version:1.01

*Thislibraryisfreesoftware;youcanredistributeitand/or
*modifyitunderthetermsoftheGNULesserGeneralPublic
*LicenseaspublishedbytheFreeSoftwareFoundation;either
*version2.1oftheLicense,or(atyouroption)anylaterversion.
*
*Thislibraryisdistributedinthehopethatitwillbeuseful,
*butWITHOUTANYWARRANTY;withouteventheimpliedwarrantyof
*MERCHANTABILITYorFITNESSFORAPARTICULARPURPOSE.SeetheGNU
*LesserGeneralPublicLicenseformoredetails.
*
*YoushouldhavereceivedacopyoftheGNULesserGeneralPublic
*Licensealongwiththislibrary;ifnot,writetotheFreeSoftware
*Foundation,Inc.,59TemplePlace,Suite330,Boston,MA02111-1307USA

YoumaycontacttheauthorofSnoopybye-mailat:
[email protected]

Or,writeto:
MonteOhrt
CTO,ispi
237S.70thsuite220
Lincoln,NE68510

ThelatestversionofSnoopycanbeobtainedfrom:
http://snoopy.sourceforge.net/

************************************************
*/

class Snoopy
{
/* ***Publicvariables*** */

/* userdefinablevars */

var $host = " www.php.net " ; // hostnameweareconnectingto
var $port = 80 ; // portweareconnectingto
var $proxy_host = "" ; // proxyhosttouse
var $proxy_port = "" ; // proxyporttouse
var $proxy_user = "" ; // proxyusertouse
var $proxy_pass = "" ; // proxypasswordtouse

var $agent = " Snoopyv1.2.3 " ; // agentwemasqueradeas
var $referer = "" ; // refererinfotopass
var $cookies = array (); // arrayofcookiestopass
//$cookies["username"]="joe";

var $rawheaders = array (); // arrayofrawheaderstosend
//$rawheaders["Content-type"]="text/html";


var $maxredirs = 5 ; // httpredirectiondepthmaximum.0=disallow
var $lastredirectaddr = "" ; // containsaddressoflastredirectedaddress
var $offsiteok = true ; // allowsredirectionoff-site
var $maxframes = 0 ; // framecontentdepthmaximum.0=disallow
var $expandlinks = true ; // expandlinkstofullyqualifiedURLs.
//thisonlyappliestofetchlinks()
//submitlinks(),andsubmittext()

var $passcookies = true ; // passsetcookiesbackthroughredirects
//NOTE:thiscurrentlydoesnotrespect
//dates,domainsorpaths.


var $user = "" ; // userforhttpauthentication
var $pass = "" ; // passwordforhttpauthentication

//httpaccepttypes

var $accept = " image/gif,image/x-xbitmap,image/jpeg,image/pjpeg,*/* " ;

var $results = "" ; // wherethecontentisput

var $error = "" ; // errormessagessenthere
var $response_code = "" ; // responsecodereturnedfromserver
var $headers = array (); // headersreturnedfromserversenthere
var $maxlength = 500000 ; // maxreturndatalength(body)
var $read_timeout = 0 ; // timeoutonreadoperations,inseconds
//supportedonlysincePHP4Beta4
//setto0todisallowtimeouts

var $timed_out = false ; // ifareadoperationtimedout
var $status = 0 ; // httprequeststatus

var $temp_dir = " /tmp " ; // temporarydirectorythatthewebserver
//haspermissiontowriteto.
//underWindows,thisshouldbeC: emp


var $curl_path = " /usr/local/bin/curl " ;
// SnoopywillusecURLforfetching
//SSLcontentifafullsystempathto
//thecURLbinaryissuppliedhere.
//settofalseifyoudonothave
//cURLinstalled.Seehttp://curl.haxx.se
//fordetailsoninstallingcURL.
//Snoopydoes*not*usethecURL
//libraryfunctionsbuiltintophp,
//asthesefunctionsarenotstable
//asofthisSnoopyrelease.


/* ***Privatevariables*** */

var $_maxlinelen = 4096 ; // maxlinelength(headers)

var $_httpmethod = " GET " ; // defaulthttprequestmethod
var $_httpversion = " HTTP/1.0 " ; // defaulthttprequestversion
var $_submit_method = " POST " ; // defaultsubmitmethod
var $_submit_type = " application/x-www-form-urlencoded " ; // defaultsubmittype
var $_mime_boundary = "" ; // MIMEboundaryformultipart/form-datasubmittype
var $_redirectaddr = false ; // willbesetifpagefetchedisaredirect
var $_redirectdepth = 0 ; // incrementsonanhttpredirect
var $_frameurls = array (); // framesrcurls
var $_framedepth = 0 ; // incrementsonframedepth

var $_isproxy = false ; // setifusingaproxyserver
var $_fp_timeout = 30 ; // timeoutforsocketconnection

/* ======================================================================*
Function:fetch
Purpose:fetchthecontentsofawebpage
(andpossiblyotherprotocolsinthe
futurelikeftp,nntp,gopher,etc.)
Input:$URIthelocationofthepagetofetch
Output:$this->resultstheoutputtextfromthefetch
*======================================================================
*/

function fetch( $URI )
{

// preg_match("|^([^:]+)://([^:/]+)(:[d]+)*(.*)|",$URI,$URI_PARTS);
$URI_PARTS = parse_url ( $URI );
if ( ! empty ( $URI_PARTS [ " user " ]))
$this -> user = $URI_PARTS [ " user " ];
if ( ! empty ( $URI_PARTS [ " pass " ]))
$this -> pass = $URI_PARTS [ " pass " ];
if ( empty ( $URI_PARTS [ " query " ]))
$URI_PARTS [ " query " ] = '' ;
if ( empty ( $URI_PARTS [ " path " ]))
$URI_PARTS [ " path " ] = '' ;

switch ( strtolower ( $URI_PARTS [ " scheme " ]))
{
case " http " :
$this -> host = $URI_PARTS [ " host " ];
if ( ! empty ( $URI_PARTS [ " port " ]))
$this -> port = $URI_PARTS [ " port " ];
if ( $this -> _connect( $fp ))
{
if ( $this -> _isproxy)
{
// usingproxy,sendentireURI
$this -> _httprequest( $URI , $fp , $URI , $this -> _httpmethod);
}
else
{
$path = $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ] ? " ? " . $URI_PARTS [ " query " ] : "" );
// noproxy,sendonlythepath
$this -> _httprequest( $path , $fp , $URI , $this -> _httpmethod);
}

$this -> _disconnect( $fp );

if ( $this -> _redirectaddr)
{
/* urlwasredirected,checkifwe'vehitthemaxdepth */
if ( $this -> maxredirs > $this -> _redirectdepth)
{
// onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr) || $this -> offsiteok)
{
/* followtheredirect */
$this -> _redirectdepth ++ ;
$this -> lastredirectaddr = $this -> _redirectaddr;
$this -> fetch( $this -> _redirectaddr);
}
}
}

if ( $this -> _framedepth < $this -> maxframes && count ( $this -> _frameurls) > 0 )
{
$frameurls = $this -> _frameurls;
$this -> _frameurls = array ();

while ( list ( , $frameurl ) = each ( $frameurls ))
{
if ( $this -> _framedepth < $this -> maxframes)
{
$this -> fetch( $frameurl );
$this -> _framedepth ++ ;
}
else
break ;
}
}
}
else
{
return false ;
}
return true ;
break ;
case " https " :
if ( ! $this -> curl_path)
return false ;
if ( function_exists ( " is_executable " ))
if ( ! is_executable ( $this -> curl_path))
return false ;
$this -> host = $URI_PARTS [ " host " ];
if ( ! empty ( $URI_PARTS [ " port " ]))
$this -> port = $URI_PARTS [ " port " ];
if ( $this -> _isproxy)
{
// usingproxy,sendentireURI
$this -> _httpsrequest( $URI , $URI , $this -> _httpmethod);
}
else
{
$path = $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ] ? " ? " . $URI_PARTS [ " query " ] : "" );
// noproxy,sendonlythepath
$this -> _httpsrequest( $path , $URI , $this -> _httpmethod);
}

if ( $this -> _redirectaddr)
{
/* urlwasredirected,checkifwe'vehitthemaxdepth */
if ( $this -> maxredirs > $this -> _redirectdepth)
{
// onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr) || $this -> offsiteok)
{
/* followtheredirect */
$this -> _redirectdepth ++ ;
$this -> lastredirectaddr = $this -> _redirectaddr;
$this -> fetch( $this -> _redirectaddr);
}
}
}

if ( $this -> _framedepth < $this -> maxframes && count ( $this -> _frameurls) > 0 )
{
$frameurls = $this -> _frameurls;
$this -> _frameurls = array ();

while ( list ( , $frameurl ) = each ( $frameurls ))
{
if ( $this -> _framedepth < $this -> maxframes)
{
$this -> fetch( $frameurl );
$this -> _framedepth ++ ;
}
else
break ;
}
}
return true ;
break ;
default :
// notavalidprotocol
$this -> error = ' Invalidprotocol" ' . $URI_PARTS [ " scheme " ] . ' " ' ;
return false ;
break ;
}
return true ;
}

/* ======================================================================*
Function:submit
Purpose:submitanhttpform
Input:$URIthelocationtopostthedata
$formvarstheformvarstouse.
format:$formvars["var"]="val";
$formfilesanarrayoffilestosubmit
format:$formfiles["var"]="/dir/filename.ext";
Output:$this->resultsthetextoutputfromthepost
*======================================================================
*/

function submit( $URI , $formvars = "" , $formfiles = "" )
{
unset ( $postdata );

$postdata = $this -> _prepare_post_body( $formvars , $formfiles );

$URI_PARTS = parse_url ( $URI );
if ( ! empty ( $URI_PARTS [ " user " ]))
$this -> user = $URI_PARTS [ " user " ];
if ( ! empty ( $URI_PARTS [ " pass " ]))
$this -> pass = $URI_PARTS [ " pass " ];
if ( empty ( $URI_PARTS [ " query " ]))
$URI_PARTS [ " query " ] = '' ;
if ( empty ( $URI_PARTS [ " path " ]))
$URI_PARTS [ " path " ] = '' ;

switch ( strtolower ( $URI_PARTS [ " scheme " ]))
{
case " http " :
$this -> host = $URI_PARTS [ " host " ];
if ( ! empty ( $URI_PARTS [ " port " ]))
$this -> port = $URI_PARTS [ " port " ];
if ( $this -> _connect( $fp ))
{
if ( $this -> _isproxy)
{
// usingproxy,sendentireURI
$this -> _httprequest( $URI , $fp , $URI , $this -> _submit_method , $this -> _submit_type , $postdata );
}
else
{
$path = $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ] ? " ? " . $URI_PARTS [ " query " ] : "" );
// noproxy,sendonlythepath
$this -> _httprequest( $path , $fp , $URI , $this -> _submit_method , $this -> _submit_type , $postdata );
}

$this -> _disconnect( $fp );

if ( $this -> _redirectaddr)
{
/* urlwasredirected,checkifwe'vehitthemaxdepth */
if ( $this -> maxredirs > $this -> _redirectdepth)
{
if ( ! preg_match ( " |^ " . $URI_PARTS [ " scheme " ] . " ://| " , $this -> _redirectaddr))
$this -> _redirectaddr = $this -> _expandlinks( $this -> _redirectaddr , $URI_PARTS [ " scheme " ] . " :// " . $URI_PARTS [ " host " ]);

// onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr) || $this -> offsiteok)
{
/* followtheredirect */
$this -> _redirectdepth ++ ;
$this -> lastredirectaddr = $this -> _redirectaddr;
if ( strpos ( $this -> _redirectaddr , " ? " ) > 0 )
$this -> fetch( $this -> _redirectaddr); // theredirecthaschangedtherequestmethodfromposttoget
else
$this -> submit( $this -> _redirectaddr , $formvars , $formfiles );
}
}
}

if ( $this -> _framedepth < $this -> maxframes && count ( $this -> _frameurls) > 0 )
{
$frameurls = $this -> _frameurls;
$this -> _frameurls = array ();

while ( list ( , $frameurl ) = each ( $frameurls ))
{
if ( $this -> _framedepth < $this -> maxframes)
{
$this -> fetch( $frameurl );
$this -> _framedepth ++ ;
}
else
break ;
}
}

}
else
{
return false ;
}
return true ;
break ;
case " https " :
if ( ! $this -> curl_path)
return false ;
if ( function_exists ( " is_executable " ))
if ( ! is_executable ( $this -> curl_path))
return false ;
$this -> host = $URI_PARTS [ " host " ];
if ( ! empty ( $URI_PARTS [ " port " ]))
$this -> port = $URI_PARTS [ " port " ];
if ( $this -> _isproxy)
{
// usingproxy,sendentireURI
$this -> _httpsrequest( $URI , $URI , $this -> _submit_method , $this -> _submit_type , $postdata );
}
else
{
$path = $URI_PARTS [ " path " ] . ( $URI_PARTS [ " query " ] ? " ? " . $URI_PARTS [ " query " ] : "" );
// noproxy,sendonlythepath
$this -> _httpsrequest( $path , $URI , $this -> _submit_method , $this -> _submit_type , $postdata );
}

if ( $this -> _redirectaddr)
{
/* urlwasredirected,checkifwe'vehitthemaxdepth */
if ( $this -> maxredirs > $this -> _redirectdepth)
{
if ( ! preg_match ( " |^ " . $URI_PARTS [ " scheme " ] . " ://| " , $this -> _redirectaddr))
$this -> _redirectaddr = $this -> _expandlinks( $this -> _redirectaddr , $URI_PARTS [ " scheme " ] . " :// " . $URI_PARTS [ " host " ]);

// onlyfollowredirectifit'sonthissite,oroffsiteokistrue
if ( preg_match ( " |^http:// " . preg_quote ( $this -> host) . " |i " , $this -> _redirectaddr) || $this -> offsiteok)
{
/* followtheredirect */
$this -> _redirectdepth ++ ;
$this -> lastredirectaddr = $this -> _redirectaddr;
if ( strpos ( $this -> _redirectaddr , " ? " ) > 0 )
$this -> fetch( $this -> _redirectaddr); // theredirecthaschangedtherequestmethodfromposttoget
else
$this -> submit( $this -> _redirectaddr , $formvars , $formfiles );
}
}
}

if ( $this -> _framedepth < $this -> maxframes && count ( $this -> _frameurls) > 0 )
{
$frameurls = $this -> _frameurls;
$this -> _frameurls = array ();

while ( list ( , $frameurl ) = each ( $frameurls ))
{
if ( $this -> _framedepth < $this -> maxframes)
{
$this -> fetch( $frameurl );
$this -> _framedepth ++ ;
}
else
break ;
}
}
return true ;
break ;

default :
// notavalidprotocol
$this -> error = ' Invalidprotocol" ' . $URI_PARTS [ " scheme " ] . ' " ' ;
return false ;
break ;
}
return true ;
}

/* ======================================================================*
Function:fetchlinks
Purpose:fetchthelinksfromawebpage
Input:$URIwhereyouarefetchingfrom
Output:$this->resultsanarrayoftheURLs
*======================================================================
*/

function fetchlinks( $URI )
{
if ( $this -> fetch( $URI ))
{
if ( $this -> lastredirectaddr)
$URI = $this -> lastredirectaddr;
if ( is_array ( $this -> results))
{
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
$this -> results[ $x ] = $this -> _striplinks( $this -> results[ $x ]);
}
else
$this -> results = $this -> _striplinks( $this -> results);

if ( $this -> expandlinks)
$this -> results = $this -> _expandlinks( $this -> results , $URI );
return true ;
}
else
return false ;
}

/* ======================================================================*
Function:fetchform
Purpose:fetchtheformelementsfromawebpage
Input:$URIwhereyouarefetchingfrom
Output:$this->resultstheresultinghtmlform
*======================================================================
*/

function fetchform( $URI )
{

if ( $this -> fetch( $URI ))
{

if ( is_array ( $this -> results))
{
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
$this -> results[ $x ] = $this -> _stripform( $this -> results[ $x ]);
}
else
$this -> results = $this -> _stripform( $this -> results);

return true ;
}
else
return false ;
}


/* ======================================================================*
Function:fetchtext
Purpose:fetchthetextfromawebpage,strippingthelinks
Input:$URIwhereyouarefetchingfrom
Output:$this->resultsthetextfromthewebpage
*======================================================================
*/

function fetchtext( $URI )
{
if ( $this -> fetch( $URI ))
{
if ( is_array ( $this -> results))
{
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
$this -> results[ $x ] = $this -> _striptext( $this -> results[ $x ]);
}
else
$this -> results = $this -> _striptext( $this -> results);
return true ;
}
else
return false ;
}

/* ======================================================================*
Function:submitlinks
Purpose:grablinksfromaformsubmission
Input:$URIwhereyouaresubmittingfrom
Output:$this->resultsanarrayofthelinksfromthepost
*======================================================================
*/

function submitlinks( $URI , $formvars = "" , $formfiles = "" )
{
if ( $this -> submit( $URI , $formvars , $formfiles ))
{
if ( $this -> lastredirectaddr)
$URI = $this -> lastredirectaddr;
if ( is_array ( $this -> results))
{
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
{
$this -> results[ $x ] = $this -> _striplinks( $this -> results[ $x ]);
if ( $this -> expandlinks)
$this -> results[ $x ] = $this -> _expandlinks( $this -> results[ $x ] , $URI );
}
}
else
{
$this -> results = $this -> _striplinks( $this -> results);
if ( $this -> expandlinks)
$this -> results = $this -> _expandlinks( $this -> results , $URI );
}
return true ;
}
else
return false ;
}

/* ======================================================================*
Function:submittext
Purpose:grabtextfromaformsubmission
Input:$URIwhereyouaresubmittingfrom
Output:$this->resultsthetextfromthewebpage
*======================================================================
*/

function submittext( $URI , $formvars = "" , $formfiles = "" )
{
if ( $this -> submit( $URI , $formvars , $formfiles ))
{
if ( $this -> lastredirectaddr)
$URI = $this -> lastredirectaddr;
if ( is_array ( $this -> results))
{
for ( $x = 0 ; $x < count ( $this -> results); $x ++ )
{
$this -> results[ $x ] = $this -> _striptext( $this -> results[ $x ]);
if ( $this -> expandlinks)
$this -> results[ $x ] = $this -> _expandlinks( $this -> results[ $x ] , $URI );
}
}
else
{
$this -> results = $this -> _striptext( $this -> results);
if ( $this -> expandlinks)
$this -> results = $this -> _expandlinks( $this -> results , $URI );
}
return true ;
}
else
return false ;
}



/* ======================================================================*
Function:set_submit_multipart
Purpose:Settheformsubmissioncontenttypeto
multipart/form-data
*======================================================================
*/
function set_submit_multipart()
{
$this -> _submit_type = " multipart/form-data " ;
}


/* ======================================================================*
Function:set_submit_normal
Purpose:Settheformsubmissioncontenttypeto
application/x-www-form-urlencoded
*======================================================================
*/
function set_submit_normal()
{
$this -> _submit_type = " application/x-www-form-urlencoded " ;
}




/* ======================================================================*
Privatefunctions
*======================================================================
*/


/* ======================================================================*
Function:_striplinks
Purpose:stripthehyperlinksfromanhtmldocument
Input:$documentdocumenttostrip.
Output:$matchanarrayofthelinks
*======================================================================
*/

function _striplinks( $document )
{
preg_match_all ( " '<s*as.*?hrefs*=s*#find<ahref=
(["'])?#findsingleordoublequote
(?(1)(.*?)/1|([^s>]+))#ifquotefound,matchuptonextmatching
#quote,otherwisematchuptonextspace
'isx
" , $document , $links );


// catenatethenon-emptymatchesfromtheconditionalsubpattern

while ( list ( $key , $val ) = each ( $links [ 2 ]))
{
if ( ! empty ( $val ))
$match [] = $val ;
}

while ( list ( $key , $val ) = each ( $links [ 3 ]))
{
if ( ! empty ( $val ))
$match [] = $val ;
}

// returnthelinks
return $match ;
}

/* ======================================================================*
Function:_stripform
Purpose:striptheformelementsfromanhtmldocument
Input:$documentdocumenttostrip.
Output:$matchanarrayofthelinks
*======================================================================
*/

function _stripform( $document )
{
preg_match_all ( " '</?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=</?(option|select)[^<>]*>[ ]*)|(?=[ ]*))|(?=[ ]*))'Usi " , $document , $elements );

// catenatethematches
$match = implode ( " " , $elements [ 0 ]);

// returnthelinks
return $match ;
}



/* ======================================================================*
Function:_striptext
Purpose:stripthetextfromanhtmldocument
Input:$documentdocumenttostrip.
Output:$texttheresultingtext
*======================================================================
*/

function _striptext( $document )
{

// Ididn'tusepregeval(//e)sincethatisonlyavailableinPHP4.0.
//so,listyourentitiesonebyonehere.Iincludedsomeofthe
//morecommonones.


$search = array ( " '<script[^>]*?>.*?</script>'si " , // stripoutjavascript
" '<[/!]*?[^<>]*?>'si " , // stripouthtmltags
" '([ ])[s]+' " , // stripoutwhitespace
" '&(quot|#34|#034|#x22);'i " , // replacehtmlentities
" '&(amp|#38|#038|#x26);'i " , // addedhexadecimalvalues
" '&(lt|#60|#060|#x3c);'i " ,
" '&(gt|#62|#062|#x3e);'i " ,
" '&(nbsp|#160|#xa0);'i " ,
" '&(iexcl|#161);'i " ,
" '&(cent|#162);'i " ,
" '&(pound|#163);'i " ,
" '&(copy|#169);'i " ,
" '&(reg|#174);'i " ,
" '&(deg|#176);'i " ,
" '&(#39|#039|#x27);' " ,
" '&(euro|#8364);'i " , // europe
" '&a(uml|UML);' " , // german
" '&o(uml|UML);' " ,
" '&u(uml|UML);' " ,
" '&A(uml|UML);' " ,
" '&O(uml|UML);' " ,
" '&U(uml|UML);' " ,
" '&szlig;'i " ,
);
$replace = array ( "" ,
"" ,
" /1 " ,
" " " ,
" & " ,
" < " ,
" > " ,
" " ,
chr ( 161 ) ,
chr ( 162 ) ,
chr ( 163 ) ,
chr ( 169 ) ,
chr ( 174 ) ,
chr ( 176 ) ,
chr ( 39 ) ,
chr ( 128 ) ,
" ?,
" ?,
" ?,
" ?,
" ?,
" ?,
" ?,
);

$text=preg_replace($search,$replace,$document);

return$text;
}

/*======================================================================*
Function:_expandlinks
Purpose:expandeachlinkintoafullyqualifiedURL
Input:$linksthelinkstoqualify
$URIthefullURItogetthebasefrom
Output:$expandedLinkstheexpandedlinks
*======================================================================*/

function_expandlinks($links,$URI)
{

preg_match(
" /^ [ ^ ? ] +/ " ,$URI,$match);

$match=preg_replace(
" |/ [ ^ / . ] + . [ ^ / . ] + $ | " , "" ,$match[0]);
$match=preg_replace(
" |/ $ | " , "" ,$match);
$match_part=parse_url($match);
$match_root=
$match_part[
" scheme " ]. " : // ".$match_part["host"];

$search = array ( " |^http:// " . preg_quote ( $this -> host) . " |i " ,
" |^(/)|i " ,
" |^(?!http://)(?!mailto:)|i " ,
" |/./| " ,
" |/[^/]+/../| "
);

$replace = array ( "" ,
$match_root . " / " ,
$match . " / " ,
" / " ,
" / "
);

$expandedLinks = preg_replace ( $search , $replace , $links );

return $expandedLinks ;
}

/* ======================================================================*
Function:_httprequest
Purpose:gogetthehttpdatafromtheserver
Input:$urltheurltofetch
$fpthecurrentopenfilepointer
$URIthefullURI
$bodybodycontentstosendifany(POST)
Output:
*======================================================================
*/

function _httprequest( $url , $fp , $URI , $http_method , $content_type = "" , $body = "" )
{
$cookie_headers = '' ;
if ( $this -> passcookies && $this -> _redirectaddr)
$this -> setcookies();

$URI_PARTS = parse_url ( $URI );
if ( empty ( $url ))
$url = " / " ;
$headers = $http_method . " " . $url . " " . $this -> _httpversion . " " ;
if ( ! empty ( $this -> agent))
$headers .= " User-Agent: " . $this -> agent . " " ;
if ( ! empty ( $this -> host) && ! isset ( $this -> rawheaders[ ' Host ' ])){
$headers .= " Host: " . $this -> host;
if ( ! empty ( $this -> port))
$headers .= " : " . $this -> port;
$headers .= " " ;
}
if ( ! empty ( $this -> accept))
$headers .= " Accept: " . $this -> accept . " " ;
if ( ! empty ( $this -> referer))
$headers .= " Referer: " . $this -> referer . " " ;
if ( ! empty ( $this -> cookies))
{
if ( ! is_array ( $this -> cookies))
$this -> cookies = ( array ) $this -> cookies;

reset ( $this -> cookies);
if ( count ( $this -> cookies) > 0 ){
$cookie_headers .= ' Cookie: ' ;
foreach ( $this -> cookies as $cookieKey => $cookieVal ){
$cookie_headers .= $cookieKey . " = " . urlencode ( $cookieVal ) . " ; " ;
}
$headers .= substr ( $cookie_headers , 0 ,- 2 ) . " " ;
}
}
if ( ! empty ( $this -> rawheaders))
{
if ( ! is_array ( $this -> rawheaders))
$this -> rawheaders = ( array ) $this -> rawheaders;
while ( list ( $headerKey , $headerVal ) = each ( $this -> rawheaders))
$headers .= $headerKey . " : " . $headerVal . " " ;
}
if ( ! empty ( $content_type )){
$headers .= " Content-type:$content_type " ;
if ( $content_type == " multipart/form-data " )
$headers .= " ;boundary= " . $this -> _mime_boundary;
$headers .= " " ;
}
if ( ! empty ( $body ))
$headers .= " Content-length: " . strlen ( $body ) . " " ;
if ( ! empty ( $this -> user) || ! empty ( $this -> pass))
$headers .= " Authorization:Basic " . base64_encode ( $this -> user . " : " . $this -> pass) . " " ;

// addproxyauthheaders
if ( ! empty ( $this -> proxy_user))
$headers .= ' Proxy-Authorization: ' . ' Basic ' . base64_encode ( $this -> proxy_user . ' : ' . $this -> proxy_pass) . " " ;


$headers .= " " ;

// setthereadtimeoutifneeded
if ( $this -> read_timeout > 0 )
socket_set_timeout ( $fp , $this -> read_timeout);
$this -> timed_out = false ;

fwrite ( $fp , $headers . $body , strlen ( $headers . $body ));

$this -> _redirectaddr = false ;
unset ( $this -> headers);

while ( $currentHeader = fgets ( $fp , $this -> _maxlinelen))
{
if ( $this -> read_timeout > 0 && $this -> _check_timeout( $fp ))
{
$this -> status =- 100 ;
return false ;
}

if ( $currentHeader == " " )
break ;

// ifaheaderbeginswithLocation:orURI:,settheredirect
if ( preg_match ( " /^(Location:|URI:)/i " , $currentHeader ))
{
// getURLportionoftheredirect
preg_match ( " /^(Location:|URI:)[]+(.*)/i " , chop ( $currentHeader ) , $matches );
// lookfor://intheLocationheadertoseeifhostnameisincluded
if ( ! preg_match ( " |://| " , $matches [ 2 ]))
{
// nohostinthepath,soprepend
$this -> _redirectaddr = $URI_PARTS [ " scheme " ] . " :// " . $this -> host . " : " . $this -> port;
// eliminatedoubleslash
if ( ! preg_match ( " |^/| " , $matches [ 2 ]))
$this -> _redirectaddr .= " / " . $matches [ 2 ];
else
$this -> _redirectaddr .= $matches [ 2 ];
}
else
$this -> _redirectaddr = $matches [ 2 ];
}

if ( preg_match ( " |^HTTP/| " , $currentHeader ))
{
if ( preg_match ( " |^HTTP/[^s]*s(.*?)s| " , $currentHeader , $status ))
{
$this -> status = $status [ 1 ];
}
$this -> response_code = $currentHeader ;
}

$this -> headers[] = $currentHeader ;
}

$results = '' ;
do {
$_data = fread ( $fp , $this -> maxlength);
if ( strlen ( $_data ) == 0 ){
break ;
}
$results .= $_data ;
}
while ( true );

if ( $this -> read_timeout > 0 && $this -> _check_timeout( $fp ))
{
$this -> status =- 100 ;
return false ;
}

// checkifthereisaaredirectmetatag

if ( preg_match ( " '<meta[s]*http-equiv[^>]*?content[s]*=[s]*["']?d+;[s]*URL[s]*=[s]*([^"']*?)["']?>'i " , $results , $match ))

{
$this -> _redirectaddr = $this -> _expandlinks( $match [ 1 ] , $URI );
}

// havewehitourframedepthandisthereframesrctofetch?
if (( $this -> _framedepth < $this -> maxframes) && preg_match_all ( " '<frames+.*src[s]*=['"]?([^'">]+)'i " , $results , $match ))
{
$this -> results[] = $results ;
for ( $x = 0 ; $x < count ( $match [ 1 ]); $x ++ )
$this -> _frameurls[] = $this -> _expandlinks( $match [ 1 ][ $x ] , $URI_PARTS [ " scheme " ] . " :// " . $this -> host);
}
// havewealreadyfetchedframedcontent?
elseif ( is_array ( $this -> results))
$this -> results[] = $results ;
// noframedcontent
else
$this -> results = $results ;

return true ;
}

/* ======================================================================*
Function:_httpsrequest
Purpose:gogetthehttpsdatafromtheserverusingcurl
Input:$urltheurltofetch
$URIthefullURI
$bodybodycontentstosendifany(POST)
Output:
*======================================================================
*/

function _httpsrequest( $url , $URI , $http_method , $content_type = "" , $body = "" )
{
if ( $this -> passcookies && $this -> _redirectaddr)
$this -> setcookies();

$headers = array ();

$URI_PARTS = parse_url ( $URI );
if ( empty ( $url ))
$url = " / " ;
// GET...headernotneededforcurl
//$headers[]=$http_method."".$url."".$this->_httpversion;

if ( ! empty ( $this -> agent))
$headers [] = " User-Agent: " . $this -> agent;
if ( ! empty ( $this -> host))
if ( ! empty ( $this -> port))
$headers [] = " Host: " . $this -> host . " : " . $this -> port;
else
$headers [] = " Host: " . $this -> host;
if ( ! empty ( $this -> accept))
$headers [] = " Accept: " . $this -> accept;
if ( ! empty ( $this -> referer))
$headers [] = " Referer: " . $this -> referer;
if ( ! empty ( $this -> cookies))
{
if ( ! is_array ( $this -> cookies))
$this -> cookies = ( array ) $this -> cookies;

reset ( $this -> cookies);
if ( count ( $this -> cookies) > 0 ){
$cookie_str = ' Cookie: ' ;
foreach ( $this -> cookies as $cookieKey => $cookieVal ){
$cookie_str .= $cookieKey . " = " . urlencode ( $cookieVal ) . " ; " ;
}
$headers [] = substr ( $cookie_str , 0 ,- 2 );
}
}
if ( ! empty ( $this -> rawheaders))
{
if ( ! is_array ( $this -> rawheaders))
$this -> rawheaders = ( array ) $this -> rawheaders;
while ( list ( $headerKey , $headerVal ) = each ( $this -> rawheaders))
$headers [] = $headerKey . " : " . $headerVal ;
}
if ( ! empty ( $content_type )){
if ( $content_type == " multipart/form-data " )
$headers [] = " Content-type:$content_type;boundary= " . $this -> _mime_boundary;
else
$headers [] = " Content-type:$content_type " ;
}
if ( ! empty ( $body ))
$headers [] = " Content-length: " . strlen ( $body );
if ( ! empty ( $this -> user) || ! empty ( $this -> pass))
$headers [] = " Authorization:BASIC " . base64_encode ( $this -> user . " : " . $this -> pass);

for ( $curr_header = 0 ; $curr_header < count ( $headers ); $curr_header ++ ){
$safer_header = strtr ( $headers [ $curr_header ] , " " " , " " );
$cmdline_params .= " -H" " . $safer_header . " " " ;
}

if ( ! empty ( $body ))
$cmdline_params .= " -d"$body" " ;

if ( $this -> read_timeout > 0 )
$cmdline_params .= " -m " . $this -> read_timeout;

$headerfile = tempnam ( $temp_dir , " sno " );

$safer_URI = strtr ( $URI , " " " , " " ); // stripquotesfromtheURItoavoidshellaccess
exec ( $this -> curl_path . " -D"$headerfile" " . $cmdline_params . " " " . $safer_URI . " " " , $results , $return );

if ( $return )
{
$this -> error = " Error:cURLcouldnotretrievethedocument,error$return. " ;
return false ;
}


$results = implode ( " " , $results );

$result_headers = file ( " $headerfile " );

$this -> _redirectaddr = false ;
unset ( $this -> headers);

for ( $currentHeader = 0 ; $currentHeader < count ( $result_headers ); $currentHeader ++ )
{

// ifaheaderbeginswithLocation:orURI:,settheredirect
if ( preg_match ( " /^(Location:|URI:)/i " , $result_headers [ $currentHeader ]))
{
// getURLportionoftheredirect
preg_match ( " /^(Location:|URI:)s+(.*)/ " , chop ( $result_headers [ $currentHeader ]) , $matches );
// lookfor://intheLocationheadertoseeifhostnameisincluded
if ( ! preg_match ( " |://| " , $matches [ 2 ]))
{
// nohostinthepath,soprepend
$this -> _redirectaddr = $URI_PARTS [ " scheme " ] . " :// " . $this -> host . " : " . $this -> port;
// eliminatedoubleslash
if ( ! preg_match ( " |^/| " , $matches [ 2 ]))
$this -> _redirectaddr .= " / " . $matches [ 2 ];
else
$this -> _redirectaddr .= $matches [ 2 ];
}
else
$this -> _redirectaddr = $matches [ 2 ];
}

if ( preg_match ( " |^HTTP/| " , $result_headers [ $currentHeader ]))
$this -> response_code = $result_headers [ $currentHeader ];

$this -> headers[] = $result_headers [ $currentHeader ];
}

// checkifthereisaaredirectmetatag

if ( preg_match ( " '<meta[s]*http-equiv[^>]*?content[s]*=[s]*["']?d+;[s]*URL[s]*=[s]*([^"']*?)["']?>'i " , $results , $match ))
{
$this -> _redirectaddr = $this -> _expandlinks( $match [ 1 ] , $URI );
}

// havewehitourframedepthandisthereframesrctofetch?
if (( $this -> _framedepth < $this -> maxframes) && preg_match_all ( " '<frames+.*src[s]*=['"]?([^'">]+)'i " , $results , $match ))
{
$this -> results[] = $results ;
for ( $x = 0 ; $x < count ( $match [ 1 ]); $x ++ )
$this -> _frameurls[] = $this -> _expandlinks( $match [ 1 ][ $x ] , $URI_PARTS [ " scheme " ] . " :// " . $this -> host);
}
// havewealreadyfetchedframedcontent?
elseif ( is_array ( $this -> results))
$this -> results[] = $results ;
// noframedcontent
else
$this -> results = $results ;

unlink ( " $headerfile " );

return true ;
}

/* ======================================================================*
Function:setcookies()
Purpose:setcookiesforaredirection
*======================================================================
*/

function setcookies()
{
for ( $x = 0 ; $x < count ( $this -> headers); $x ++ )
{
if ( preg_match ( ' /^set-cookie:[s]+([^=]+)=([^;]+)/i ' , $this -> headers[ $x ] , $match ))
$this -> cookies[ $match [ 1 ]] = urldecode ( $match [ 2 ]);
}
}


/* ======================================================================*
Function:_check_timeout
Purpose:checkswhethertimeouthasoccurred
Input:$fpfilepointer
*======================================================================
*/

function _check_timeout( $fp )
{
if ( $this -> read_timeout > 0 ){
$fp_status = socket_get_status ( $fp );
if ( $fp_status [ " timed_out " ]){
$this -> timed_out = true ;
return true ;
}
}
return false ;
}

/* ======================================================================*
Function:_connect
Purpose:makeasocketconnection
Input:$fpfilepointer
*======================================================================
*/

function _connect( & $fp )
{
if ( ! empty ( $this -> proxy_host) && ! empty ( $this -> proxy_port))
{
$this -> _isproxy = true ;

$host = $this -> proxy_host;
$port = $this -> proxy_port;
}
else
{
$host = $this -> host;
$port = $this -> port;
}

$this -> status = 0 ;

if ( $fp = fsockopen (
$host ,
$port ,
$errno ,
$errstr ,
$this -> _fp_timeout
))
{
// socketconnectionsucceeded

return true ;
}
else
{
// socketconnectionfailed
$this -> status = $errno ;
switch ( $errno )
{
case - 3 :
$this -> error = " socketcreationfailed(-3) " ;
case - 4 :
$this -> error = " dnslookupfailure(-4) " ;
case - 5 :
$this -> error = " connectionrefusedortimedout(-5) " ;
default :
$this -> error = " connectionfailed( " . $errno . " ) " ;
}
return false ;
}
}
/* ======================================================================*
Function:_disconnect
Purpose:disconnectasocketconnection
Input:$fpfilepointer
*======================================================================
*/

function _disconnect( $fp )
{
return ( fclose ( $fp ));
}


/* ======================================================================*
Function:_prepare_post_body
Purpose:Preparepostbodyaccordingtoencodingtype
Input:$formvars-formvariables
$formfiles-formuploadfiles
Output:postbody
*======================================================================
*/

function _prepare_post_body( $formvars , $formfiles )
{
settype ( $formvars , " array " );
settype ( $formfiles , " array " );
$postdata = '' ;

if ( count ( $formvars ) == 0 && count ( $formfiles ) == 0 )
return ;

switch ( $this -> _submit_type){
case " application/x-www-form-urlencoded " :
reset ( $formvars );
while ( list ( $key , $val ) = each ( $formvars )){
if ( is_array ( $val ) || is_object ( $val )){
while ( list ( $cur_key , $cur_val ) = each ( $val )){
$postdata .= urlencode ( $key ) . " []= " . urlencode ( $cur_val ) . " & " ;
}
}
else
$postdata .= urlencode ( $key ) . " = " . urlencode ( $val ) . " & " ;
}
break ;

case " multipart/form-data " :
$this -> _mime_boundary = " Snoopy " . md5 ( uniqid ( microtime ()));

reset ( $formvars );
while ( list ( $key , $val ) = each ( $formvars )){
if ( is_array ( $val ) || is_object ( $val )){
while ( list ( $cur_key , $cur_val ) = each ( $val )){
$postdata .= " -- " . $this -> _mime_boundary . " " ;
$postdata .= " Content-Disposition:form-data;name="$key[]" " ;
$postdata .= " $cur_val " ;
}
}
else {
$postdata .= " -- " . $this -> _mime_boundary . " " ;
$postdata .= " Content-Disposition:form-data;name="$key" " ;
$postdata .= " $val " ;
}
}

reset ( $formfiles );
while ( list ( $field_name , $file_names ) = each ( $formfiles )){
settype ( $file_names , " array " );
while ( list ( , $file_name ) = each ( $file_names )){
if ( ! is_readable ( $file_name )) continue ;

$fp = fopen ( $file_name , " r " );
$file_content = fread ( $fp , filesize ( $file_name ));
fclose ( $fp );
$base_name = basename ( $file_name );

$postdata .= " -- " . $this -> _mime_boundary . " " ;
$postdata .= " Content-Disposition:form-data;name="$field_name";filename="$base_name" " ;
$postdata .= " $file_content " ;
}
}
$postdata .= " -- " . $this -> _mime_boundary . " -- " ;
break ;
}

return $postdata ;
}
}

?>

你可能感兴趣的:(Class)