为了搞科研,最近用php写了个爬虫抓图像,要交给实验室的同学用,苦于需要安装一大堆软件,还要小心配置,实在麻烦。向做PHP的同学取经,推荐了Windows下的PHP集成发布软件APMServ,稳定版本是5.2.6,对应PHP版本也是5.2.6,完全的傻瓜式一键配置。
为了不喧宾夺主,直接给出官方网站: APMServ5.2.6。
说说我的PHP学习感受,PHP的语法和C非常像,会写C会写PHP此言不虚。另外,PHP的变量前面有一个$符号,每次都让我觉得是不是PHP的发明人很缺钱,搞个货币符号提醒自己要多写点代码,这样就可以发工资了。
分享一个PHP Spider的代码:
<?php class spider { public $url; //访问地方站 public $webcookies; //cookies/session public $refer; //来路 public $PROXY = ""; //代理IP public $encoding; //编码 public $cmod; //new为新会话 public $jmod; //'jump'跳入下一页面 public $hmod; //'noheader'不带协议头。 public $type; //'bin' 不进行编码转换 图片已二进制输出 public $postfield; //提交数据 public $time = 30; //超时时间 public $contents; //返回内容页面 public function methodGet() { //初始化配置 $ch = curl_init(); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_TIMEOUT, $this->time); if($this->PROXY != ""){ curl_setopt ($ch, CURLOPT_PROXY, "http://".$this->PROXY) ; } curl_setopt($ch, CURLOPT_HTTPHEADER, array('Keep-Alive: 300','Connection: keep-alive')) ; curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; GTB7.4; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2)'); curl_setopt($ch, CURLOPT_HTTPGET,1); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); if(strcmp($this->jmod , 'jump')==0) curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1); $refer = parse_url($this->url) ; if(empty($this->refer)) curl_setopt($ch, CURLOPT_REFERER,$refer); else curl_setopt($ch, CURLOPT_REFERER,$this->refer); curl_setopt($ch, CURLOPT_URL,$this->url); switch ($this->cmod){ case 'new': curl_setopt($ch, CURLOPT_COOKIESESSION, 1); break; default: curl_setopt($ch, CURLOPT_COOKIE,$this->webcookies); } switch($this->hmod){ case 'noheader': curl_setopt($ch, CURLOPT_HEADER,0); break; default: curl_setopt($ch, CURLOPT_HEADER,1); } $this->contents = curl_exec($ch); curl_close($ch); if ($this->contents=='') return FALSE; if ($this->type!='bin'){ //是否是文本 //对结果进行字符集转换 if ($this->encoding!="UTF-8") $this->contents=mb_convert_encoding($this->contents ,"UTF-8",$this->encoding); } //获取头部的cookie并保存到本对象的cookies字段。 $tmp = explode(';',$this->webcookies) ; if(empty($this->webcookies)) $tmp = array() ; $tmp2=array() ; foreach($tmp as $key) { $tmp1 = explode('=',$key,2) ; $tmp2[$tmp1[0]]=$tmp1[1] ; } preg_match_all("/Set-Cookie: (.*)[;\r\n]{1,1}/isU", $this->contents, $results); foreach($results[1] as $key) { $tmpnow1=explode('=',$key,2) ; $tmp2[$tmpnow1[0]]=$tmpnow1[1] ; } $tmpcookies = '' ; foreach($tmp2 as $i=>$key) { if($i != '') $tmpcookies = $tmpcookies.$i.'='.$key.';'; } $tmpcookies = substr($tmpcookies , 0 , -1) ; $this->webcookies = $tmpcookies ; return $this->contents; } public function methodPost() { $ch = curl_init(); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); if($this->PROXY != ""){ curl_setopt ($ch, CURLOPT_PROXY, "http://".$this->PROXY) ; } curl_setopt($ch, CURLOPT_TIMEOUT, $this->time); /*curl_setopt($ch, CURLOPT_HTTPHEADER, array( 'Expect:', 'application/x-www-form-urlencoded', 'X-MicrosoftAjax: Delta=true', )); */ curl_setopt($ch, CURLOPT_HTTPHEADER, array('Keep-Alive: 300','Connection: keep-alive')) ; curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; GTB7.4; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2)'); curl_setopt($ch, CURLOPT_POST,1); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($ch, CURLOPT_URL,$this->url); if(strcmp($this->jmod , 'jump')==0) curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1); $refer = parse_url($this->url) ; if(empty($this->refer)) curl_setopt($ch, CURLOPT_REFERER,$refer); else curl_setopt($ch, CURLOPT_REFERER,$this->refer); curl_setopt($ch, CURLOPT_POSTFIELDS,$this->postfield); switch ($this->cmod){ case 'new': curl_setopt($ch, CURLOPT_COOKIESESSION, 1); break; default: curl_setopt($ch, CURLOPT_COOKIE, $this->webcookies); } switch($this->hmod){ case 'noheader': curl_setopt($ch, CURLOPT_HEADER,0); break; default: curl_setopt($ch, CURLOPT_HEADER,1); } $this->contents = curl_exec($ch); curl_close($ch); if ($this->contents=='') return FALSE; //对结果进行字符集转换 if ($this->encoding!="UTF-8") $this->contents=mb_convert_encoding($this->contents ,"UTF-8",$this->encoding); //获取头部的cookie并保存到本对象的cookies字段。 $tmp = explode(';',$this->webcookies) ; if(empty($this->webcookies)) $tmp = array() ; $tmp2=array() ; foreach($tmp as $key) { $tmp1 = explode('=',$key,2) ; $tmp2[$tmp1[0]]=$tmp1[1] ; } preg_match_all("/Set-Cookie: (.*)[;\r\n]{1,1}/isU", $this->contents, $results); foreach($results[1] as $key) { $tmpnow1=explode('=',$key,2) ; $tmp2[$tmpnow1[0]]=$tmpnow1[1] ; } $tmpcookies = '' ; foreach($tmp2 as $i=>$key) { if($i != '') $tmpcookies = $tmpcookies.$i.'='.$key.';'; } $tmpcookies = substr($tmpcookies , 0 , -1) ; $this->webcookies = $tmpcookies ; return $this->contents; } public function url($url) { $this->url = $url; return $this; } public function SetCookies($cookies) { $this->webcookies = $cookies; return $this; } public function ip($ip) { $this->PROXY = $ip; return $this; } public function code($code) { $this->encoding = $code; return $this; } public function cmod($cmod) { //new为新会话 $this->cmod = $cmod; return $this; } public function jmod($jmod){ //'jump'跳入下一页面 $this->jmod = $jmod; return $this; } public function hmod($hmod) { //'noheader'不带协议头。 $this->hmod = $hmod; return $this; } public function type($type) { //'bin' 不进行编码转换 图片已二进制输出 $this->type = $type; return $this; } public function post($postdata) { //提交数据 $this->postfield = $postdata; return $this; } public function buytime($time) { $this->time = $time ; return $this; } public function refer($refer) { $this->refer = $refer; return $this; } public function clear() { $this->url = ""; //访问地方站 $this->webcookies = ""; //cookies/session $this->refer = ""; //来路 $this->PROXY = ""; //代理IP $this->encoding = ""; //编码 $this->cmod = ""; //new为新会话 $this->jmod = ""; //'jump'跳入下一页面 $this->hmod = ""; //'noheader'不带协议头。 $this->type = ""; //'bin' 不进行编码转换 图片已二进制输出 $this->postfield = ""; //提交数据 $this->time = 30; //超时时间 $this->contents = ""; //返回内容页面 } } ?>