"content-type:text/html;charset=utf-8");
/**
* @author Cpath
* @time 2016/5/8
* curl采集获取10页图片数据
*/
if (!is_file('./photo')) {
mkdir('./photo');
}
//$dns="mysql:host=127.0.0.1;dbname=article";
//$p=new PDO($dns,"root","root");
//$p->query("set names utf8");
$preg='#[\r\t\s\n]+[\r\t\s\n]+[\r\t\s\n]+- [\r\t\s\n]+[\r\t\s\n]+[\r\t\s\n]+[\r\t\s\n]+[\r\t\s\n]+[\r\t\s\n]+[\r\t\s\n]+[\r\t\s\n]+
[\r\t\s\n]+ [\r\t\s\n]+ | [\r\t\s\n]+
#isU';
for ($i=0; $i <=240; $i++) {
$url="http://image.yodao.com/search?q=%E5%A4%A9%E8%8A%B1%E6%9D%BF%E5%9B%BE%E7%89%87&keyfrom=image.nextPage&start=".$i+=24;
$ch=curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
$con=curl_exec($ch);
preg_match_all($preg,$con,$res);
$images = $res[1];
foreach ($images as $key => $value) {
set_time_limit(0);
$file=file_get_contents($images[$key]);
//获取图片扩展名
$pathex=pathinfo($images[$key]);
$img=str_replace('0.','',str_replace(' ','',microtime())).'.'.$pathex['extension'];
$success=file_put_contents("./photo/".$img,$file);
//入库
//$sql="insert into goods(img) value('".$img."')";
//$pre=$p->prepare($sql);
//$success=$pre->execute();
if($success){
if ($i%24==0) {
echo "目前采集了".$i."张图片
";
}
}else{
echo "采集失败";
}
}
curl_close($ch);
}
include 'db.php';
function curl($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$output=curl_exec($ch);
curl_close($ch);
return $output;
}
$start=isset($_POST['start'])?isset($_POST['start']):'';
$file1=isset($_POST['file1'])?isset($_POST['file1']):'';
if($start=='start'){
$url="http://auto.ifeng.com";
$output=curl($url);
$preg = '#[\r\n\t\s]+
.*
(.*).*
(.*) .* (\d{4}[-]{1}\d{2}[-]{1}\d{2} \d{2}[:]\d{2}).*
(.*)
.*
#isU';
preg_match_all($preg,$output,$res);
for($i=0;$i$res[1]);$i++){
$sql = 'insert into news_cache (dsurl,img,title,brand,`time`,content) values ("'.$res[1][$i].'","'.$res[2][$i].'","'.$res[3][$i].'","'.$res[4][$i].'","'.$res[5][$i].'","'.$res[6][$i].'")';
$dbh=db();
$su=$dbh->exec($sql);
if($su){
header("Location:news_list.php");
}else{
echo "采集失败----";
}
}
}elseif($file1=='file1'){
$dbh=db();
$sth = $dbh->prepare("select id,dsurl from news_cache");
$sth ->execute();
$result = $sth->fetchAll(PDO::FETCH_ASSOC);
for($i=0;$i$result);$i++){
ob_start();
$cacheTime = 864000;
$cacheDir = 'cacheDir';
if (!is_dir($cacheDir)) mkdir($cacheDir);
$cacheFile = $cacheDir.'/'.'news-car-'.$result[$i]['id'].'.html';
$redis = new redis();
$redis->connect('127.0.0.1','6379');
$redis->auth('root');
$redis->setex("car-".$result[$i]['id'],1000*10,$result[$i]['id']);
if (!is_file($cacheFile) || time() - filemtime($cacheFile) > $cacheTime) {
$output=curl($result[$i]['dsurl']);
$preg='#(.*)
#isU';
preg_match_all($preg,$output,$ress);
print_r(''.$ress[0][0].$ress[1][0]);
$content = ob_get_contents();
$fp = fopen($cacheFile, "w");
fwrite($fp, $content);
fclose($fp);
} else {
echo $content = file_get_contents($cacheFile);
}
}
}
?>
<button onclick="start()">开始采集button>
<button onclick="file1()">批量生成html文件button>
<button onclick="list()">新闻列表button>
<script src="jquery-1.9.1.min.js">script>
<script>
function start(){
$.post('news_caiji.php',{start:'start'});
}
function file1(){
$.post('news_caiji.php',{file1:'file1'});
}
function list(){
location.href="http://www.php1.com/Freshman/news_cache/news_list.html";
}
script>