利用php封装函数抓取京东商城省市县数据

<?php
/**
 * Created by PhpStorm.
 * User: Administrator
 * Date: 14-9-1
 * Time: 下午2:00
 */
error_reporting(E_ALL);
require './source/class/class_core.php';
$discuz = & discuz_core::instance();
//$cachelist = array('plugin');
$discuz->cachelist = $cachelist;
$discuz->init();




/**
* 远程打开URL
* @param string $url   打开的url, 如
* @param int $limit   取返回的数据的长度
* @param string $post   要发送的 POST 数据,如uid=1&password=1234
    * @param string $cookie 要模拟的 COOKIE 数据,如uid=123&auth=a2323sd2323
    * @param bool $bysocket TRUE/FALSE 是否通过SOCKET打开
* @param string $ip   IP地址
* @param int $timeout   连接超时时间
* @param bool $block   是否为阻塞模式
* @return    取到的字符串
*/
function uc_fopen($url, $limit = 0, $post = '', $cookie = '', $bysocket = FALSE, $ip = '', $timeout = 50, $block = false) {
    $return = '';
    $matches = parse_url($url);
    !isset($matches['host']) && $matches['host'] = '';
    !isset($matches['path']) && $matches['path'] = '';
    !isset($matches['query']) && $matches['query'] = '';
    !isset($matches['port']) && $matches['port'] = '';
    $host = $matches['host'];
    $path = $matches['path'] ? $matches['path'].($matches['query'] ? '?'.$matches['query'] : '') : '/';
    $port = !empty($matches['port']) ? $matches['port'] : 80;
    if($post) {
        $out = "POST $path HTTP/1.0\r\n";
        $out .= "Accept: **\r\n";
        //$out .= "Referer: $boardurl\r\n";
        $out .= "Accept-Language: zh-cn\r\n";
        $out .= "User-Agent: $_SERVER[HTTP_USER_AGENT]\r\n";
        $out .= "Host: $host\r\n";
        $out .= "Connection: Close\r\n";
        $out .= "Cookie: $cookie\r\n\r\n";
    }else {
        $out = "GET $path HTTP/1.0\r\n";
        $out .= "Accept: */*\r\n";
        //$out .= "Referer: $boardurl\r\n";
        $out .= "Accept-Language: zh-cn\r\n";
        $out .= "User-Agent: $_SERVER[HTTP_USER_AGENT]\r\n";
        $out .= "Host: $host\r\n";
        $out .= "Connection: Close\r\n";
        $out .= "Cookie: $cookie\r\n\r\n";
    }

    $fp = @fsockopen(($ip ? $ip : $host), $port, $errno, $errstr, $timeout);
    if(!$fp) {
        return '';//note $errstr : $errno \r\n
    } else {
        stream_set_blocking($fp, $block);
        stream_set_timeout($fp, $timeout);
        @fwrite($fp, $out);
        $status = stream_get_meta_data($fp);
        if(!$status['timed_out']) {
            while (!feof($fp)) {
                if(($header = @fgets($fp)) && ($header == "\r\n" || $header == "\n")) {
                    break;
                }
            }

            $stop = false;
            while(!feof($fp) && !$stop) {
                $data = fread($fp, ($limit == 0 || $limit > 8192 ? 8192 : $limit));
                $return .= $data;
                if($limit) {
                    $limit -= strlen($data);
                    $stop = $limit <= 0;
                }
            }
        }
        @fclose($fp);
        $lenth=strlen($return);
        $return=substr($return,1,$lenth-2);
        return json_decode($return,true);
    }
}

$return=uc_fopen('http://d.360buy.com/area/get?fid=0');
/*function  getdata($id=0){
    $string='';
    $arr=uc_fopen('http://d.360buy.com/area/get?fid='.$id);
    $length=$id?$lenth=count($arr):34;
    for($i=0;$i<$length;$i++){
         $string.=$arr[$i]['name'];
         sleep(5);
         getdata($arr[$i]['id']);
    }

  return $string;
}*/

@$fp = fopen("sql.txt","w");
if(!$fp){
    echo "system error";
    exit();
}
for($i=0;$i<34;$i++){
 //echo "insert into ultrax_common_district_Tmp (code,name,upcode,level) value (".$return[$i]['id'].",'".$return[$i]['name']."',0,0);<br/>";
    $city=uc_fopen('http://d.360buy.com/area/get?fid='.$return[$i]['id']);
    sleep(1);
    for($j=0;$j<count($city);$j++){
            //$fileData="insert into ultrax_common_district_Tmp (code,name,upcode,level) value (".$city[$j]['id'].",'".$city[$j]['name']."',".$return[$i]['id'].",1);\n";
            //fwrite($fp,$fileData);
        //echo "insert into ultrax_common_district_Tmp (code,name,upcode,level) value ('".$city[$j]['id']."',".$city[$j]['name']."',".$return[$i]['id'].",1);<br/>";
        $area=uc_fopen('http://d.360buy.com/area/get?fid='.$city[$j]['id']);
        $length=count($area);
        for($k=0;$k<count($area);$k++){

            $fileData="insert into ultrax_common_district_Tmp (code,name,upcode,level) value (".$area[$k]['id'].",'".$area[$k]['name']."',".$city[$j]['id'].",2);\n";
            fwrite($fp,$fileData);
            //echo "insert into ultrax_common_district_Tmp (code,name,upcode,level) value (".$area[$k]['id'].",'".$area[$k]['name']."',".$city[$j]['id'].",2);\n";
        }
      sleep(1);
    }

}
fclose($fp);
exit;


/*jquery jsonp  无法进行抓取,会出现阻塞的情况*/
echo  $script=<<<script
<html>
<body>
<script src="./static/js/jquery-1.10.2.min.js"></script>
<script>
var data=$return;
var city=null;
function getdata(id){
  $.ajax({
             url: "http://d.360buy.com/area/get?fid="+id,
             // the name of the callback parameter, as specified by the YQL service
            jsonp: "callback",
            // tell jQuery we're expecting JSONP
            dataType: "jsonp",
              // tell YQL what we want and that we want JSON
            data: {
                     format: "json"
                  },
              // work with the response
          success: function(response) {
                for(var j=0;j<response.length;j++){
                  console.log("insert into city (name,upid) values ('"+response[j]["name"]+"',"+id+");<br>");
                }
            }
    });
}
for(var i= 0;i<34;i++){
    var province=data[i];
    //setTimeout(getdata(data[i]['id']),1000000000000*i);
    for(var j=0;j<province.length;j++){
        alert(province);
        setTimeout(getdata(province[j]['id']),1000000000000*j);
    }
  //document.write("insert into province (name,upid) values ("+data[i]["name"]+"',0);<br>");
}
</script>
</body>
</html>
script;


/*新版数据*/

/*$oprovince=DB::query(" select * from ".DB::table("common_district")." where upid = 0");
if($province){
    $oarea=array();
    while($result = DB::fetch($oprovince)) {
        $oarea[]=$result;
    }
}else{
    return false;
}*/


你可能感兴趣的:(利用php封装函数抓取京东商城省市县数据)