简单的图片爬取【php】

因为做项目,需要拿一些图片来处理测试,就写了个简单的爬取壁纸网站,代码如下:

';
		print_r($data);
		echo '
'; }else{ echo $data; } echo "
"; } function download($url, $pronames,$path = 'images/') { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30); $file = curl_exec($ch); curl_close($ch); $filename = pathinfo($url, PATHINFO_BASENAME); if(!is_dir($path . $pronames)){ mkdir('images/'.$pronames); } $resource = fopen($path . $pronames . '/' . $filename, 'a'); fwrite($resource, $file); fclose($resource); } //$subject = "1aotfyyjzj3aa4bdkxvg78457zmfxdngrxwyta3qb7nh1jpvotmlwv9o4e6lwzeg"; //$subject = file_get_contents('./pro/t1.html'); // set_time_limit(0); $pattern = "/[a-zA-z]+:\/\/[^\s]*?jpg/"; $arr = array(); $matches = null; //每一页的重复图片 $shieldUrl = array( 'http://img.netbian.com/file/2018/1218/5174f6bd57412ee0d66bcff52629cf5e.jpg', 'http://img.netbian.com/file/2018/1218/2c953b94fcb487c210cb8816f7146148.jpg', 'http://img.netbian.com/file/2018/1225/604a688cd6f79161236e6250189bc25b.jpg', 'http://img.netbian.com/file/2019/0304/d7718c988ae353a6531742bb92d97e91.jpg' ); for ($i=2; $i < 100; $i++) { # code... $subject = file_get_contents('http://www.netbian.com/index_'.$i.'.htm'); $t2 = preg_match_all($pattern,$subject,$matches); $arr[] = $matches[0]; foreach ($matches[0] as $key => $value) { if (!in_array($value, $shieldUrl)) { download($value,$i); //下载 } } } dump($arr); ?>

 

你可能感兴趣的:(PHP)