需要安装的第三方库有Beautifulsoup,requests,threading
代码如下:
from bs4 import BeautifulSoup
import requests
import os
import threading
import sys
def get_urls(): # 获取图集的urls
urls = []
for i in range(1, 6):
try:
res = requests.get('http://www.win4000.com/meinvtag26_' + str(i) + '.html')
if res.status_code == 200:
print('连接成功')
soup = BeautifulSoup(res.text, 'lxml')
list = soup.find(class_='Left_bar').find('ul', class_='clearfix').find_all('li')
for item in list:
urls.append(item.find('a').get('href'))
except requests.RequestException:
print('连接失败')
sys.exit()
return urls
def grab_download(url):
index = 1
res2 = requests.get(url)
soup2 = BeautifulSoup(res2.text, 'lxml')
title = soup2.find(class_='ptitle').find('h1').string
page = soup2.find(class_='ptitle').find('em').string
folder = 'pics/' + title + '/'
if os.path.exists(folder) is False:
os.makedirs(folder)
print('正在下载图集' + title)
for i in range(1, int(page)+1):
res3 = requests.get(url[:-5] + '_' + str(i) + '.html')
soup3 = BeautifulSoup(res3.text, 'lxml')
pic_url = soup3.find(class_='pic-meinv').find('a').find('img').get('data-original')
with open(folder + str(index) + '.jpg', 'wb') as f:
img = requests.get(pic_url).content
f.write(img)
index += 1
print(title + '图集下载完成')
if __name__ == '__main__':
urls = get_urls()
threads = []
for url in urls:
threading.Thread(target=grab_download, args=(url,)).start()