ini_set("display_errors", "On");
error_reporting(E_ALL | E_STRICT);
require_once(dirname(__FILE__).'/include/phpQuery.class.php');
require_once(dirname(__FILE__).'/include/Db.class.php');
require_once(dirname(__FILE__).'/include/Image.class.php');
require_once(dirname(__FILE__).'/include/Cachefile.class.php');
require_once(dirname(__FILE__).'/include/Folder.class.php');
set_time_limit(0);
/**
* @param $fetchurl @url
* @param $ipArr @ip
* @param array $portArr
* @return mixed
*/
function getSogouContent($fetchurl,$ipArr,$portArr = array()){
foreach($ipArr as $key=>$ip){
$port = $portArr[$key];
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $fetchurl);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($ch, CURLOPT_PROXYAUTH, CURLAUTH_BASIC); //代理认证模式
curl_setopt($ch, CURLOPT_PROXY, $ip); //代理服务器地址
curl_setopt($ch, CURLOPT_PROXYPORT, $port); //代理服务器端口
//curl_setopt($ch, CURLOPT_PROXYUSERPWD, ":"); //http代理认证帐号,username:password的格式
curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); //使用http代理模式
//curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322)');
$result = curl_exec($ch);
curl_close($ch);
if($result){
return $result;
}
}
}
/**
* 采集代理IP
*/
for($i=1;$i<=3;$i++){
$fetchIpUrl = 'http://www.xicidaili.com/nn/'.$i;
phpQuery::newDocumentFile($fetchIpUrl);
$ipstr = pq("tr");
foreach($ipstr as $key=>$iplist) {
$ip = trim(pq($iplist)->find('td:eq(1)')->text());
if($ip){
$ipArr[] = $ip;
$portArr[] = trim(pq($iplist)->find('td:eq(2)')->text());
}
}
}
/** @var @获取最大页数 $fetchurls */
$fetchurls = 'http://www.jinse.com/ajax/lives/getList?search=&id='."1000000".'&flag=down';
$data = getSogouContent($fetchurls,$ipArr,$portArr);
$dataList = json_decode($data,1);
$dataList = $dataList['data'][date('Y-m-d')];
$max_id = $dataList[0]['id'] ;
$max_id = $max_id + 1 ;
$source = 'jinse';
//先取到列表页内的,最新更新内容页URL
$sql="SELECT old_source_url FROM `table`
WHERE old_source = 'jinse' ORDER BY old_source_url DESC LIMIT 1 ";
$sofresult = $WEBDB->fetch_first($sql);
$start_id = (int)$sofresult['old_source_url'];
$project = "/home/wwwlogs/jinse/";
for($i=$start_id;$i<=$max_id;$i++){
$fetchurl = 'http://www.jinse.com/ajax/lives/getList?search=&id='.$i.'&flag=down';
/** @var @获取数据 $data */
$data = getSogouContent($fetchurl,$ipArr,$portArr);
$dataList = json_decode($data,1);
$dataList = $dataList['data'][date('Y-m-d')];
$NewDataList = array_reverse($dataList);
//var_dump($NewDataList);exit;
foreach($NewDataList as $key=>$list) {
$old_source_url = $list['id'];
//检测是否已采集,如果存在。则放弃
$is_sql="SELECT id FROM `table` WHERE old_source = 'jinse' AND old_source_url='$old_source_url'";
$sofresult = $WEBDB->fetch_first($is_sql);
if(!$sofresult['id']) {
$old_id = addslashes($list['id']);
$topic_id = addslashes($list['topic_id']);
$live_id = addslashes($list['live_id']);
$content = addslashes($list['content']);
$source = addslashes($list['source']);
$source_url = addslashes($list['source_url']);
$type = addslashes($list['type']);
$website = addslashes($list['website']);
$status = addslashes($list['status']);
$grade = addslashes($list['grade']);
$highlight_color = addslashes($list['highlight_color']);
$link = addslashes($list['link']);
$link_name = addslashes($list['link_name']);
$created_by = addslashes($list['created_by']);
$created_at = addslashes($list['created_at']);
$updated_at = addslashes($list['updated_at']);
$publish_time = (int)addslashes($list['publish_time']);
$up_counts = addslashes($list['up_counts']);
$down_counts = addslashes($list['down_counts']);
$images = addslashes($list['images']);
$old_source = 'jinse';
$created_at = strtotime($created_at);
$updated_at = $created_at+rand(60,600);
//取到设备记录值的ID号
if($content && $old_id) {
$sql = "INSERT INTO `table`
(`id`, `topic_id`, `live_id`, `content`, `source`, `source_url`, `type`, `website`, `status`,
`grade`, `highlight_color`, `link_name`, `link`, `created_by`, `created_at`, `updated_at`,
`publish_time`, `up_counts`, `down_counts`, `images`, `old_source`, `old_source_url`)
VALUES (NULL, '$topic_id', '$live_id', '$content', '$source', '$source_url', '$type', '$website',
'$status', '$grade', '$highlight_color', '$link_name', '$link', '$created_by', '$created_at',
'$updated_at', '$publish_time', '$up_counts', '$down_counts', '$images', '$old_source', '$old_source_url');";
$re = $WEBDB->query($sql);
$content_id = $WEBDB->insert_id();
$content = "|fetchurl|".$fetchurl."|is_sql|".$is_sql."|sofresult|".$sofresult."|sql|".$sql."|$content_id|".$content_id;
if (!is_dir($project)) {
@mkdir($project);
}
file_put_contents($project . "jinse_".date('Y-m-d H:i') . ".log", '[' . date('Y-m-d H:i:s') . ']' .
$content . "\n"."\n", FILE_APPEND);
if($content_id){
$html='已采集原地址为:'.$oldurl.'----
';
echo $html.='软件标题为:'.$title.'------
';
}
}
set_time_limit(0);
}
}
}
//}
exit;