文章地址: http://blog.csdn.net/aerchi/article/details/51644423
set_time_limit(0);
// GET测试淘宝详情页抓取
$url
=
"http://item.taobao.com/item.htm?id=37530539791"
;
$url
=
"http://item.taobao.com/item.htm?id=14861616067"
;
//curl 伪造useragent
$useragent
=
array
(
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)'
,
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)'
,
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1'
,
'Opera/9.27 (Windows NT 5.2; U; zh-cn)'
,
'Opera/8.0 (Macintosh; PPC Mac OS X; U; en)'
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13 '
,
'Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Version/3.1 Safari/525.13'
);
header(
"Content-type: text/html; charset=utf-8"
);
$cookiefile
=
realpath
(
"./"
).
"/Application/Runtime/Temp/cookie.txt"
;
//创建一个用于存放cookie信息的临时文件,
if
(!
file_exists
(
$cookiefile
)){
$file
= @
file_put_contents
(
$cookiefile
,
""
);
}
$ch
= curl_init();
//设置选项,包括URL
curl_setopt(
$ch
, CURLOPT_URL,
$url
);
curl_setopt(
$ch
, CURLOPT_TIMEOUT,
$timeout
);
curl_setopt(
$ch
, CURLOPT_HEADER, 0);
curl_setopt(
$ch
, CURLOPT_NOBODY,0);
curl_setopt(
$ch
, CURLOPT_MAXREDIRS, 300);
curl_setopt(
$ch
, CURLOPT_RETURNTRANSFER, true);
//获取数据返回流形式
curl_setopt(
$ch
, CURLOPT_AUTOREFERER, true);
//重定向时,自动设置header中的Referer:信息
curl_setopt(
$ch
, CURLOPT_FOLLOWLOCATION, true);
//启用时会将服务器服务器返回的"Location: "放在header中递归的返回给服务器,使用CURLOPT_MAXREDIRS可以限定递归返回的数量
// 设置iP和useragent
/*curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)');
curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:28.58.88.'.$r, 'CLIENT-IP:225.28.58.'.$r)); //构造IP
curl_setopt($ch, CURLOPT_REFERER, "http://www.baidu.com"); //构造来路
curl_setopt($ch, CURLOPT_USERAGENT, array_rand($useragent));*/
// 设置代理
/*curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);
curl_setopt($ch, CURLOPT_PROXY, '218.213.168.131:80');*/
//curl_setopt($ch, CURLOPT_PROXYUSERPWD, 'user:password');
// 对于cookie保存
curl_setopt(
$ch
, CURLOPT_COOKIESESSION, true);
curl_setopt(
$ch
, CURLOPT_COOKIEFILE,
$cookiefile
);
//关闭连接时,将服务器端返回的cookie保存在以下文件中
curl_setopt(
$ch
, CURLOPT_COOKIEJAR,
$cookiefile
);
//执行并获取HTML文档内容
for
(
$i
=0;
$i
<=5;
$i
++){
curl_setopt(
$ch
, CURLOPT_USERAGENT,
$useragent
[
$i
]);
$output
= curl_exec(
$ch
);
if
(!
empty
(
$output
)){
break
;
}
}
//释放curl句柄
$info
= curl_getinfo(
$ch
);
curl_close(
$ch
);
echo
"
"
;print_r(
$info
);
die
(
$output
);
1
2
|
$url
=
"http://item.taobao.com/item.htm?id=14861616067"
;
echo
curl_get(
$url
);
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
function
curl_get(
$durl
,
$data
=
array
()) {
$cookiejar
=
realpath
(
'cookie.txt'
);
$t
=
parse_url
(
$durl
);
$ch
= curl_init();
curl_setopt(
$ch
, CURLOPT_URL,
$durl
);
curl_setopt(
$ch
, CURLOPT_TIMEOUT,5);
curl_setopt(
$ch
, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt(
$ch
, CURLOPT_USERAGENT,
$_SERVER
[
'HTTP_USER_AGENT'
]);
curl_setopt(
$ch
, CURLOPT_REFERER,
"http://$t[host]/"
);
curl_setopt(
$ch
, CURLOPT_COOKIEFILE,
$cookiejar
);
curl_setopt(
$ch
, CURLOPT_COOKIEJAR,
$cookiejar
);
curl_setopt(
$ch
, CURLOPT_RETURNTRANSFER,1);
curl_setopt(
$ch
, CURLOPT_FOLLOWLOCATION, true);
if
(
$data
) {
curl_setopt(
$ch
, CURLOPT_POST, 1);
curl_setopt(
$ch
, CURLOPT_POSTFIELDS,
$data
);
}
$r
= curl_exec(
$ch
);
curl_close(
$ch
);
return
$r
;
}
|
file_get_contents方法
代码如下 | |
$opt=array('http'=>array('header'=>"Referer: $refer")); |
分析:
file_get_contents中stream_context_create就伪造来源的重要参数了,这个什么好说的非常的简单。
CURL方式
代码如下 | |
$ch = curl_init(); |
分析:
curl伪造来源页面非常的简单这是它的优点了,所以我们只要在页面加上curl_setopt ($ch, CURLOPT_REFERER, "http://www.111cn.net/");就可以了。
SOCKET方式
代码如下 | |
$server = 'www.111cn.net'; |
友情提示:三种性能比对fsockopen是最好的哦。
我们再使用
代码如下 | |
echo " "; echo $_SERVER["HTTP_REFERER"]; ?> |
例子1
代码如下 | |
[one.php] $post_data = array ( "user" => "gongwen", "pwd" => "123456" ); $header_ip = array( 'CLIENT-IP:88.88.88.88', 'X-FORWARDED-FOR:88.88.88.88', ); $referer='http://www.111cn.net'; $ch = curl_init(); curl_setopt ($ch, CURLOPT_URL, 'http://localhost/curl/two.PHP'); //伪造来源referer curl_setopt ($ch,CURLOPT_REFERER,$referer); //伪造来源ip curl_setopt($ch, CURLOPT_HTTPHEADER, $header_ip); //提交post传参 curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data); //加上这个表示执行curl_exec是把输出做为返回值,不会输出到浏览器 curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); $out_put=curl_exec ($ch); curl_close ($ch); echo $out_put; [two.php] //请求来源referer echo '[HTTP_REFERER] '; echo $_SERVER['HTTP_REFERER']; //请求来源ip //[注]此处的IP打印顺序是目前很多开源系统的IP获取顺序 echo ' [IP] '; echo $_SERVER['HTTP_CLIENT_IP']; echo ' '; echo $_SERVER['HTTP_X_FORWARDED_FOR']; echo ' '; echo $_SERVER['REMOTE_ADDR']; //POST数据 echo ' [POST] ';'; |
浏览器访问one.php。页面打印如下:
例子2
代码如下 | |
function getImagesUrl( $url,$userinfo,$header) { $ch = curl_init(); $timeout = 1; curl_setopt ($ch, CURLOPT_URL, "$url"); curl_setopt ($ch, CURLOPT_HTTPHEADER, $header); curl_setopt ($ch, CURLOPT_REFERER, "http://www.baidu.com/"); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_USERAGENT, "$userinfo"); curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout); $contents = curl_exec($ch); curl_close($ch); //echo $contents; return $contents ; } function saveurl( $handle ,$filename) { $fp = fopen($filename,"w"); fwrite($fp,$handle); unset($fp); unset($handle); } $binfo =array('Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; InfoPath.2; AskTbPTV/5.17.0.25589; Alexa Toolbar)','Mozilla/5.0 (Windows NT 5.1; rv:22.0) Gecko/20100101 Firefox/22.0','Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET4.0C; Alexa Toolbar)','Mozilla/4.0(compatible; MSIE 6.0; Windows NT 5.1; SV1)',$_SERVER['HTTP_USER_AGENT']); //123.125.68.* //125.90.88.* $cip = '123.125.68.'.mt_rand(0,254); $xip = '125.90.88.'.mt_rand(0,254); $header = array( 'CLIENT-IP:'.$cip, 'X-FORWARDED-FOR:'.$xip, ); $u = $binfo[mt_rand(0,3)]; $get_file = getImagesUrl($value,$u,$header); saveurl($get_file,'a.jpg'); |