超简洁爬取mzitu网站 按照标题分类存取套图

import requests
from pyquery import PyQuery as pq
import os

Picreferer = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/73.0.3683.103 Safari/537.36',
        'Referer': 'http://i.meizitu.net'
    }
Hostreferer = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/73.0.3683.103 Safari/537.36',
        'Referer': 'http://www.mzitu.com'
}
url_one = 'https://www.mzitu.com'
html_one = requests.get(url_one, headers=Hostreferer).text
doc = pq(html_one)
items = doc('.page-numbers')
page_numbers = items.text()[10:13]
page_number = input("下载多少页?")
print("#" + page_number + "#")
y = 0
if int(page_number) <= int(page_numbers):
    for i in range(1, int(page_number) + 1):
        url_list = 'https://www.mzitu.com/page/' + str(i)
        html_list = requests.get(url_list, headers=Hostreferer).text
        doc = pq(html_list)
        items = doc(' li span a').items()
        for item in items:
            y += 1
            taotu = item.attr('href')
            title = item.text()
            print("正在下载:" + title)
            try:
                os.chdir('./data./meizitu')
            except:
                os.mkdir('./data./meizitu')
                os.chdir('./data./meizitu')
            os.mkdir('./' + title)
            os.chdir('./' + title)
            url = taotu
            html = requests.get(url, headers=Hostreferer).text
            doc = pq(html)
            items = doc('.pagenavi span')
            max_pic = items.text()[15:17]
            for m in range(1, int(max_pic) + 1):
                href = url + '/' + str(m)
                html_new = requests.get(href, headers=Hostreferer).text
                doc = pq(html_new)
                item = doc('.main-image img')
                photo_url = item.attr('src')
                response = requests.get(photo_url, headers=Hostreferer)
                with open(str(y) + "." + str(m) + ".jpg", "ab") as ft:
                    ft.write(response.content)
            os.chdir("../../../")
        print("第", i, "页下载完成")


 

你可能感兴趣的:(超简洁爬取mzitu网站 按照标题分类存取套图)