抓取 LOL 官网墙纸实现

闲来无事(蛋疼),随手实现了下

Controller 代码

';
        static::save_image();
        $end = microtime(true);

        echo '

spend:' . ($end - $start) . 'second(s)

'; } // 先确定有几页 public static function page_crawler_0() { $page_content = static::curl_get(static::$portal); $dom = new \DOMDocument(); $dom->loadHTML($page_content); $xpath = new \DOMXPath($dom); $page = $xpath->query('//*[@class="pager"]/a[position() = (last() - 1)]'); $page_no = $page[0]->nodeValue; return $page_no; } // 把每页中的图片存到 $img_storage public function page_crawler_1($page_no) { $page_content = static::curl_get(static::$portal . '?page=' . $page_no); $dom = new \DOMDocument(); $dom->loadHTML($page_content); $xpath = new \DOMXPath($dom); $img_nodes = $xpath->query('//*[@class="default-2-3"]//*[starts-with(@class, "view")]/div/div//img'); $a_nodes = $xpath->query('//*[@class="default-2-3"]//*[starts-with(@class, "view")]/div/div//h4//a'); $data = []; foreach ($a_nodes as $a) { $img_base_name = $a->nodeValue; $sub_page_url = $a->getAttribute('href'); $data[] = [ 'name' => $img_base_name, 'url' => static::$remote_server . $sub_page_url, ]; } foreach ($img_nodes as $key => $img) { $src = $img->getAttribute('src'); if (strpos($src,'http:') === 0) { $data[$key]['320x180'] = $src; } else { $data[$key]['320x180'] = static::$remote_server . $src; } } static::$img_storage = array_merge(static::$img_storage, $data); } // 保存到本地 public function save_image() { $images = static::$img_storage; foreach ($images as $key => $val) { $fs = new Fs(); $wallpaper_path = public_path('images/leagueofledgends/wallpapers/' . $val['name']); if (! $fs->exists($wallpaper_path)) { $fs->makeDirectory($wallpaper_path); } $content = static::curl_get($val['320x180']); if ($fs->put($wallpaper_path . '/' . $val['name'] . '_320x180.jpg', $content)) { echo 'image saved at ' . $wallpaper_path, '
'; } else { echo 'error occurred when saving ' . $val['name'], '
'; } } } // 抓取内容 public static function curl_get($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_USERAGENT, static::$ua); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); $output = curl_exec($ch); curl_close($ch); return $output; } }

路由

Route::get('/download', 'Image\ImageController@download');

涉及技术

xpath selector (没有使用正则)

遇到的问题

file_get_contents 无法抓取, 所以换了 curl 并模拟正常浏览器(UA)

TODO

ob函数没使用溜,并不能实时 echo 进度

转载于:https://my.oschina.net/xqchina/blog/776514

你可能感兴趣的:(抓取 LOL 官网墙纸实现)