layout: post
title: "PHP获取远程文件的几种方式"
date: 2016-05-18 14:18:15 +0800
comments: true
categories: [php]
接上次的内容,今天结合代码来分析下PHP获取远程文件的几种方式。
- fopen
- 获取本地文件
- 获取到的资源绑定到一个流上面
- file_get_centents
- 获取一些Get获得的数据,也可以POST,
- 可以做断点续传
- socket
- curl
- 可以保存cookie 并且在下一次发送出去
- 做模拟登陆
fopen
fopen获取文件测试";
/*fopen打开本地文件*/
$filename="test.html";
$fp=fopen($filename,"r+");//r+ 只读,+表示如果不存在则创建
$fc="";
while(!feof($fp)){
$fc.=fgets($fp,1024);//获取到的资源绑定到流上。这样来获取流数据
}
fclose($fp);
echo "输出http_response_header
";
dumpout($http_response_header);
echo "本地文件:".$fc."
";
/*fopen打开远程url*/
$filename="http://localhost/Login/02/test.html";
$fp=fopen($filename,"r");
echo "输出stream_get_meta_data
";
dumpArr(stream_get_meta_data($fp));
$fc="";
while(!feof($fp)){
$fc=fgets($fp,1024);
}
fclose($fp);
echo "输出http_response_header
";
dumpout($http_response_header);
echo "远程url:".$fc."
";
?>
file_get_centents
file_get_contents获取文件测试";
/*file_get_contents打开本地文件*/
$filename="test.html";
$fc=file_get_contents($filename);
echo "本地文件:".$fc."
";
/*fopen打开远程url get方法*/
$filename="http://localhost/Login/02/test.php";
$fc=file_get_contents($filename);
echo "输出http_response_header
";
dumpout($http_response_header);
echo "get方法获取远程url:".$fc."
";
/*fopen打开远程url post方法*/
$filename="http://localhost/Login/02/test.php";
/*构建请求头信息*/
$post = array ('type' => '1');
$content = http_build_query($post);
$content_length = strlen($content);
$options = array(
'http' => array(
'method' => 'POST',
'header' =>
"Content-type: application/x-www-form-urlencoded\r\n" .
"Content-length: $content_length\r\n",
'content' => $content
)
);
$fc = file_get_contents($filename, false, stream_context_create($options));
echo "输出http_response_header
";
dumpout($http_response_header);
echo "post方法获取远程url:".$fc."
";
?>
socket
fsocket模拟get提交";
$url="http://localhost/Login/02/test.php?type=1";
$info = parse_url($url);
$fp = fsockopen($info["host"], 80, $errno, $errstr, 3);
$head = "GET ".$info['path']."?".$info["query"]." HTTP/1.0\r\n";
$head .= "Host: ".$info['host']."\r\n";
$head .= "\r\n";
$write = fputs($fp, $head);
while (!feof($fp)){
$line = fgets($fp);
echo $line."
";
}
/*fsocket模拟post提交*/
echo "fsocket模拟post提交
";
$query="type=1";
$info = parse_url($url);
$fp = fsockopen($info["host"], 80, $errno, $errstr, 3);
$head = "POST ".$info['path']." HTTP/1.0\r\n";
$head .= "Host: ".$info['host']."\r\n";
$head .= "Referer: http://".$info['host'].$info['path']."\r\n";
$head .= "Content-type: application/x-www-form-urlencoded\r\n";
$head .= "Content-Length: ".strlen(trim($query))."\r\n";
$head .= "\r\n";
$head .= trim($query);
$write = fputs($fp, $head);
while (!feof($fp)) {
$line = fgets($fp);
echo $line."
";
}
?>
cURL
curl模拟get提交";
$url="http://localhost/Login/02/test.php?type=1";
//初始化
$ch=curl_init();
//设置选项,包括URL
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
//执行并获取HTML文档内容
$output = curl_exec($ch);
echo "输出curl_getinfo相关信息
";
$info = curl_getinfo($ch);
dumpArr($info);
//释放curl句柄
curl_close($ch);
//打印获得的数据
print_r($output);
echo "curl模拟post提交
";
$url="http://localhost/Login/02/test.php";
$post_data = array ("type" => "1");
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// post数据
curl_setopt($ch, CURLOPT_POST, 1);
// post的变量
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);
$output = curl_exec($ch);
echo "输出curl_getinfo相关信息
";
$info = curl_getinfo($ch);
dumpArr($info);
curl_close($ch);
//打印获得的数据
print_r($output);
?>
上面的文件引用的common.php
$v ){
$t = explode( ':', $v, 2 );
if( isset( $t[1] ) ){
$head[ trim($t[0]) ] = trim( $t[1] );
}
else{
$head[] = $v;
if( preg_match( "#HTTP/[0-9\.]+\s+([0-9]+)#",$v, $out ) ){
$head['reponse_code'] = intval($out[1]);
}
}
}
return $head;
}
/*数组格式化输出header*/
function dumpout($vars, $label = '', $return = false) {
$vars=parseHeaders($vars);
if (ini_get('html_errors')) {
$content = "\n";
if ($label != '') {
$content .= "{$label} :\n";
}
$content .= htmlspecialchars(print_r($vars, true));
$content .= "\n
\n";
} else {
$content = $label . " :\n" . print_r($vars, true);
}
if ($return) { return $content; }
echo $content;
return null;
}
/*数组格式化输出arr*/
function dumpArr($vars, $label = '', $return = false) {
if (ini_get('html_errors')) {
$content = "\n";
if ($label != '') {
$content .= "{$label} :\n";
}
$content .= htmlspecialchars(print_r($vars, true));
$content .= "\n
\n";
} else {
$content = $label . " :\n" . print_r($vars, true);
}
if ($return) { return $content; }
echo $content;
return null;
}
?>
以上几种方式用得较为多的是cURl,因为它可以保存COOKIE的特性,在PHP的网络爬虫中都离不开它。