http://211.149.134.75:9080/A6E7E0
apache 要开启
php 要安装需要的版本
添加站点,是为了让定时任务能请求到代码
CREATE TABLE `task_list` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT 'ID',
`url` varchar(200) NOT NULL COMMENT '抓取网址',
`num` int(10) unsigned NOT NULL DEFAULT '1' COMMENT '数量',
`status` tinyint(1) unsigned NOT NULL DEFAULT '1' COMMENT '状态 1未完成 2已完成',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8;
curl 方法
function curl($url,$is_post=0,$data=[]){
if(empty($url)){
return false;
}
$ch = curl_init(); // 创建一个新cURL资源
curl_setopt($ch, CURLOPT_URL, $url); // 设置URL
curl_setopt($ch, CURLOPT_TIMEOUT, 30); // 设置超时限制防止死循环
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);// 爬取重定向页面
curl_setopt($ch, CURLOPT_AUTOREFERER, 1); // 自动设置Referer,防止盗链
curl_setopt($ch, CURLOPT_HEADER, 0); // 显示返回的Header区域内容
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);// 要求结果保存到字符串中还是输出到屏幕上
curl_setopt($ch, CURLOPT_USERAGENT, 'Data');// 在HTTP请求中包含一个"User-Agent: "头的字符串。
if($is_post == 1){
curl_setopt($ch, CURLOPT_POST, 1); // 发送一个常规的Post请求
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);// Post提交的数据包
}
$html = curl_exec($ch); // 运行cURL,请求URL,把结果复制给变量
if(curl_errno($ch)){
echo 'Errno'.curl_error($curl); // 捕抓异常
}
curl_close($ch); // 关闭cURL连接
return json_decode($html,true);
}
百度抓取方法
function baidu($data){
$msg = '';
if(!empty($data)){
foreach ($data as $k => $v) {
# 判断经纬度是否存在
$find = find('baidu_shop','*',['lng'=>$v['location']['lng'],'lat'=>$v['location']['lat']]);
if($find){
$msg .= ''.$v['name'].'已存在,ID为:'.$find['id'].'
';
continue;
}
$tag = explode(';',$v['detail_info']['tag']);
$cat_one = find('baidu_cat','*',['name'=>$tag[0]]);
$cat_two = find('baidu_cat','*',['name'=>$tag[1]]);
$data = [
'name' => $v['name'],
'province' => $v['province'],
'city' => $v['city'],
'area' => $v['area'],
'address' => $v['address'],
'telephone' => $v['telephone'],
'tag' => $v['detail_info']['tag'],
'detail_url' => $v['detail_info']['detail_url'],
'lng' => $v['location']['lng'],
'lat' => $v['location']['lat'],
'cat_one' => isset($cat_one['id'])?$cat_one['id']:0,
'cat_two' => isset($cat_two['id'])?$cat_two['id']:0
];
$id = insertId('baidu_shop',$data);
if($id){
$msg .= ''.$v['name'].'添加成功,ID为:'.$id.'
';
}else{
$msg .= ''.$v['name'].'添加失败
';
}
}
}
return $msg;
}
auto.php
header("Content-Type:text/html;charset=utf8");
# 载入公用pdo文件
require 'pdo.php';
# 条件
$where = [
'status' => 1
];
# 查询任务数据表
$find = find('task_list','*',$where);
if(empty($find)){
echo '没有任务了';
return false;
}
# 组装url
$url = $find['url'].$find['num'];
# 请求url,获取数据
$curl = curl($url);
# 总页数
$page = ceil($curl['total']/10);
if($find['num'] <= $page){
$ret = baidu($curl['results']);
if(empty($ret)){
$data = [
'status' => 2
];
}else{
$data = [
'num' => $find['num']+1
];
}
}else{
$data = [
'status' => 2
];
}
update('task_list',$data,'id='.$find['id']);
echo $ret;
return false;
header("Content-Type:text/html;charset=utf8");
# 载入公用pdo文件
require 'pdo.php';
# 查询全部分类
$cat = select('baidu_cat','*');
$tmp = [];
# 整理分类的分层
foreach($cat as $cat_v){
if($cat_v['pid'] == 0){
$tmp[$cat_v['id']] = $cat_v;
}else{
$tmp[$cat_v['pid']]['son'][] = $cat_v;
}
}
$url = [];
# 循环生成url,并插入到任务数据表
foreach($tmp as $tmp_v){
foreach($tmp_v['son'] as $tmp_v_v){
$data['url'] = 'http://api.map.baidu.com/place/v2/search?ak=1PQvdVNrRaLeCbkVB9VbZoQ9RyGFy7Kq&scope=2&output=json&query='.$tmp_v['name'].$tmp_v_v['name'].'®ion=合肥&page_size=20&page_num=';
insert('task_list',$data);
}
}