大众点评的爬虫代码(php版本)

1.由于本人是一个php开发,看到很多爬虫工具是python写的,好不容易找到一个php写的。

  
set_time_limit(0);  
class snap_dianping  
{  

//抓取的分类,(*)代表页码  
    static $url = array(  
    "20"=>"http://www.dianping.com/hangzhou/ch10/g215p(*)",
    "21"=>"http://www.dianping.com/hangzhou/ch10/g34032p(*)",
    "22"=>"http://www.dianping.com/hangzhou/ch10/g26481p(*)",
    "23"=>"http://www.dianping.com/hangzhou/ch10/g104p(*)",
    "24"=>"http://www.dianping.com/hangzhou/ch10/g106p(*)",
    "25"=>"http://www.dianping.com/hangzhou/ch10/g107p(*)",
    "26"=>"http://www.dianping.com/hangzhou/ch10/g1338p(*)",
    "27"=>"http://www.dianping.com/hangzhou/ch10/g34055p(*)",
    "28"=>"http://www.dianping.com/hangzhou/ch10/g2714p(*)",
    "29"=>"http://www.dianping.com/hangzhou/ch10/g118p(*)",
    "30"=>"http://www.dianping.com/hangzhou/ch10/g1783p(*)",
    "31"=>"http://www.dianping.com/hangzhou/ch10/g25474p(*)"
    );  

    static $result = array();  

    static function snap_list($url,$typename='')  
    {  
        $ch = curl_init();  
        curl_setopt($ch, CURLOPT_URL, $url);  
        curl_setopt($ch, CURLOPT_HEADER, 0);  
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);  
        curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36");  
        curl_setopt($ch, CURLOPT_POST, 1);  
        curl_setopt($ch, CURLOPT_TIMEOUT, 10);  
//        curl_setopt($ch, CURLOPT_POSTFIELDS, "form_email=".urlencode($a)."&form_password=$b");  
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);  
//        curl_setopt($ch, CURLOPT_COOKIEJAR, $d);  
        $w = curl_exec($ch);  

        curl_close($ch);  

        return $w; 
    }  

    static function snap_page()  
    {  
        foreach (self::$url as $key=>$val)  
        {  
            echo $key."
"
; $Page = 1; $t = true; while ($t) { $u = str_replace('(*)', $Page, $val); flush(); $get = self::snap_list($u, $key); file_put_contents("/data/www/html/xuechaozhang/html_page/$key/$Page.txt", $get); sleep(30); $Page++; if($Page == 50){ break; } } } return self::$result; } static function snap_geo($Address) { $data = array(); $googleAPI = "http://maps.google.com/maps/geo?output=json&oe=utf8&q=".urlencode($Address); $w['lat'] = ''; $w['lon'] = ''; //$w = json_decode(@file_get_contents($googleAPI)); $data['lat'] = $w->Placemark[0]->Point->coordinates[0]; $data['lon'] = $w->Placemark[0]->Point->coordinates[1]; return $data; } } date_default_timezone_set('Asia/Shanghai'); snap_dianping::snap_page(); ?>

你可能感兴趣的:(PHP)