获取远程文件内容之浏览器模拟器(BrowserEmulator)

出于安全的考虑,常常会关闭fopen, file_get_contents, 也就是会把 allow_url_fopen设置为OFF,如果想要继续使用这些函数,就可以用到这个类。

 

 

<?php


/* used for the transmission RPC connection 
 * and the SABnzbd+ file submit 
 */


/***************************************************************************


Browser Emulating file functions v2.0.1-torrentwatch
(c) Kai Blankenhorn
www.bitfolge.de/browseremulator
[email protected]




This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.


This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.


You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.


****************************************************************************


Changelog:


v2.0.1-torrentwatch2 by Erik Bernhardson
  multi-part post with file submit


v2.0.1-torrentwatch by Erik Bernhardson
  converted file() to file_get_contents()
  converted lastResponse to string from array to mimic file_get_contents
  added gzip compression support


v2.0.1
  fixed authentication bug
  added global debug switch


v2.0   03-09-03
  added a wrapper class; this has the advantage that you no longer need
    to specify a lot of parameters, just call the methods to set
    each option
  added option to use a special port number, may be given by setPort or
    as part of the URL (e.g. server.com:80)
  added getLastResponseHeaders()


v1.5
  added Basic HTTP user authorization
  minor optimizations


v1.0
  initial release






***************************************************************************/


/**
* BrowserEmulator class. Provides methods for opening urls and emulating
* a web browser request.
**/
class BrowserEmulator {
  var $headerLines = Array();
  var $postData = Array();
  var $multiPartPost = False;
  var $authUser = "";
  var $authPass = "";
  var $port;
  var $lastResponse = '';
  var $lastRequest = '';
  var $debug = false;
  var $customHttp = False;
 
  public function BrowserEmulator() {
    $this->resetHeaderLines();
    $this->resetPort();
  }
    /**
  * Adds a single header field to the HTTP request header. The resulting header
  * line will have the format
  * $name: $value\n
  **/
  public function addHeaderLine($name, $value) {
    $this->headerLines[$name] = $value;
  }
 
  /**
  * Deletes all custom header lines. This will not remove the User-Agent header field,
  * which is necessary for correct operation.
  **/
  public function resetHeaderLines() {
    $this->headerLines = Array();
   
    /*******************************************************************************/
    /**************   YOU MAX SET THE USER AGENT STRING HERE   *******************/
    /*                                                   */
    /* default is "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",         */
    /* which means Internet Explorer 6.0 on WinXP                       */
   
    $this->headerLines["User-Agent"] = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10';


    /*******************************************************************************/
    /**
    * Set default to accept gzip encoded files
    */
    $this->headerLines["Accept-Encoding"] = "*/*";
  }
 
  /**
  * Add a post parameter. Post parameters are sent in the body of an HTTP POST request.
  **/
  public function addPostData($name, $value = '') {
    $this->postData[$name] = $value;
  }
 
  /**
  * Deletes all custom post parameters.
  **/
  public function resetPostData() {
    $this->postData = Array();
  }


  public function handleMultiPart() {
    $boundry = '----------------------------795088511166260704540879626';


    $this->headerLines["Accept"] = ' text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
    $this->headerLines["Connection"] = 'Close';
    $this->headerLines["Content-Type"] = "multipart/form-data; boundary=$boundry";
    $out = '';
    foreach($this->postData as $item => $data) {
      if(is_array($data)) {
        $out .= "--$boundry\r\n"
               ."Content-Disposition: form-data; name=\"$item\"; filename=\"{$data['filename']}\"\r\n"
               ."Content-Type: application/octet-stream\r\n"
               ."\r\n"
               .$data['contents']."\r\n";
      } else {
        $out .= "--$boundry\r\n"
               ."Content-Disposition: form-data; name=\"$item\"\r\n"
               ."\r\n"
               .$data."\r\n";
      }
    }
    $out .= "--{$boundry}--\r\n";
    return $out;
  }


  /**
  * Sets an auth user and password to use for the request.
  * Set both as empty strings to disable authentication.
  **/
  public function setAuth($user, $pass) {
    $this->authUser = $user;
    $this->authPass = $pass;
  }
  /**
  * Selects a custom port to use for the request.
  **/
  public function setPort($portNumber) {
    $this->port = $portNumber;
  }
 
  /**
  * Resets the port used for request to the HTTP default (80).
  **/
  public function resetPort() {
    $this->port = 80;
  }


  /**
   * Parse any cookies set in the URL, and return the trimed string
   **/
  public function preparseURL($url) {
    if($cookies = stristr($url, ':COOKIE:')) {
      $url = rtrim(substr($url, 0, -strlen($cookies)), '&');
      $this->addHeaderLine("Cookie", '$Version=1; '.strtr(substr($cookies, 8), '&', ';'));
    }
    return $url;
  }


  /**
  * Make an fopen call to $url with the parameters set by previous member
  * method calls. Send all set headers, post data and user authentication data.
  * Returns a file handle on success, or false on failure.
  **/
  public function fopen($url) {
    $url = $this->preparseURL($url);
    $this->lastResponse = Array();
   
    $parts = parse_url($url);
    $protocol = $parts['scheme'];
    $server = $parts['host'];
    $port = $parts['port'];
    $path = $parts['path'];
    if(isset($parts['query'])) {
      $path .= '?'.$parts['query'];
    }


    if($protocol == 'https') {
      // TODO: https is locked to port 443, why?
      $server = 'ssl://'.$server;
      $this->setPort(443);
    } elseif ($port!="") {
        $this->setPort($port);
    }
    if ($path=="") $path = "/";
    $socket = false;
    $socket = fsockopen($server, $this->port);
    if ($socket) {
        if ($this->authUser!="" && $this->authPass!="") {
          $this->headerLines["Authorization"] = "Basic ".base64_encode($this->authUser.":".$this->authPass);
        }
      
        if($this->customHttp)
          $request = $this->customHttp." $path\r\n";
        elseif (count($this->postData)==0)
          $request = "GET $path HTTP/1.0\r\n";
        else
          $request = "POST $path HTTP/1.1\r\n";


        $request .= "Host: {$parts['host']}\r\n";
       
        if ($this->debug) echo $request;
        if (count($this->postData)>0) {
          if($this->multiPartPost) {
            $PostString = $this->handleMultiPart();
          } else {
            $PostStringArray = Array();
            foreach ($this->postData AS $key=>$value) {
              if(empty($value))
                $PostStringArray[] = $key;
              else
                $PostStringArray[] = "$key=$value";
            }
            $PostString = join("&", $PostStringArray);
          }
          $this->headerLines["Content-Length"] = strlen($PostString);
        }
       
        foreach ($this->headerLines AS $key=>$value) {
          if ($this->debug) echo "$key: $value\n";
          $request .= "$key: $value\r\n";
        }
        if ($this->debug) echo "\n";
        $request .= "\r\n";
        if (count($this->postData)>0) {
          $request .= $PostString;
        }
    }
    $this->lastRequest = $request;


    for ($written = 0; $written < strlen($request); $written += $fwrite) {
      $fwrite = fwrite($socket, substr($request, $written));
      if (!$fwrite) {
        break;
      }
    }
    if ($this->debug) echo "\n";
    if ($socket) {
      $line = fgets($socket);
      if ($this->debug) echo $line;
      $this->lastResponse .= $line;
      $status = substr($line,9,3);
      while (trim($line = fgets($socket)) != ""){
        if ($this->debug) echo "$line";
        $this->lastResponse .= $line;
        if ($status=="401" AND strpos($line,"WWW-Authenticate: Basic realm=\"")===0) {
          fclose($socket);
          return FALSE;
        }
      }
    }
    return $socket;
  }
  
  /**
  * Make an file call to $url with the parameters set by previous member
  * method calls. Send all set headers, post data and user authentication data.
  * Returns the requested file as a string on success, or false on failure.
  **/
  public function file_get_contents($url) {
    if(file_exists($url)) // local file
      return file_get_contents($url);
    $file = '';
    $socket = $this->fopen($url);
    if ($socket) {
        while (!feof($socket)) {
          $file .= fgets($socket);
        }
    } else {
        Yii::log('Browser Emulator: file_get_contents bad socket', CLogger::LEVEL_ERROR);
        return FALSE;
    }
    fclose($socket);


    if(strstr($this->lastResponse, 'Content-Encoding: gzip') !== FALSE) {
      if(function_exists('gzinflate')) {
        $file = gzinflate(substr($file,10));
        if($this->debug) echo "Result file: ".$file;
      }
    }


    return $file;
  }


  /**
   * Simulate a file() call by exploding file_get_contents()
   **/
  public function file($url) {
    $data = $this->file_get_contents($url);
    if($data)
      return explode('\n', $data);
    return False;
  }
 
  public function getLastResponseHeaders() {
    return $this->lastResponse;
  }
}

 

 

实例:

 

$be = new BrowserEmulator();

$output = $be->file_get_contents("http://tvbinz.net/rss.php");
$response = $be->getLastResponseHeaders();

echo $output;

 

 

 

来源: http://code.google.com/p/torrentwatch/source/browse/branches/yii/protected/components/downloadClients/browserEmulator.php?spec=svn780&r=780

 

 

关联:

PHP获取远程文件内容

 

 

 

function curl_get_contents($url)
{
	$dir = pathinfo($url);
	$host = $dir['dirname'];
	$refer = $host.'/';

	$ch = curl_init($url);
	curl_setopt ($ch, CURLOPT_REFERER, $refer);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
	curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
	$data = curl_exec($ch);
	curl_close($ch);
	
	return $data;
}

 

 

 

你可能感兴趣的:(emulator)