python 爬取桌面背景下载

Desktop_image_crawl.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by LiTianYao on 2019-03-28

import time
import os
import requests
from bs4 import BeautifulSoup

http_headers = {  # 'User-Agent':'Mozilla/5.0 (compatible; Googlebot/2.1; +Googlebot - Webmaster Tools Help)',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
        'Connection': 'Keep-Alive',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Accept-Encoding': 'gzip,deflate,sdch',
        'Accept': '*/*',
        'Accept-Charset': 'utf-8,GBK;q=0.7,*;q=0.3',
        'Cache-Control': 'max-age=0'
    }


url = 'http://desk.zol.com.cn'
url_ = 'http://desk.zol.com.cn/bizhi/7486_92768_2.html'
while True:
    result = requests.get(url_, headers=http_headers)
    # print(result.text)
    soup = BeautifulSoup(result.text, "html.parser")
    link2 = soup.find_all(id='bigImg')
    # print(link2, type(link2))
    image_url = link2[0].get('src')
    image_name = image_url.split('/')[-1]
    image_name = os.path.join('image', image_name)
    r = requests.get(image_url, stream=True, timeout=10)
    print("下载", image_url)
    with open(image_name, "wb") as f:
        filesize = int(r.headers['Content-Length'])
        for chunk in r.iter_content(chunk_size=512):
            if chunk:
                f.write(chunk)
                intvaluse = int((os.path.getsize(image_name) / filesize) * 100)
    pageNext = soup.find_all(id='pageNext')
    Nexturl = pageNext[0].get('href')
    url_ = url + Nexturl
    if Nexturl =='javascript:;':
        break
    time.sleep(0.1)


你可能感兴趣的:(python 爬取桌面背景下载)