各位小伙伴们,大家好呀,上一次利用python多线程爬取高清壁纸,这一次,将会用到队列,让线程之间的通信变得安全,喜欢高清壁纸的小伙伴们,赶快去试试吧!
import requests
import threading
import queue
from lxml import etree
import re
import os
DOWN_PATH = 'G:\爬虫下载\娟娟壁纸'
# 列表用来存放所有图片的url
q = queue.Queue()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
}
# 下载函数
def download():
while True:
# 判断线程1是否存活
if not t1.is_alive():
# 判断队列是否为空
if q.empty():
break
url, name, i = q.get()
path = DOWN_PATH + '\\' + name
if not os.path.exists(path):
os.makedirs(path)
os.chdir(path)
r = requests.get(url, headers=headers)
# 对文件读写上锁
lock.acquire()
with open('.\\{}.jpg'.format(i), 'wb') as f:
f.write(r.content)
lock.release()
print("下载完成{}{}.jpg".format(name, i))
# 这里写详情页的逻辑
def detail(link):
# 对详情页发送请求
r = requests.get(link, headers=headers)
r.encoding = 'gbk'
# 解析html
tree = etree.HTML(r.text)
# 提取套图数量以及名称
pic_msg = tree.xpath('//div[@class="wzfz tu-tit fix"]/h1/span/text()')[0]
# 正则提取(这里没必要提取num)
name, num = re.findall('(.*?)\(\d/(\d)', pic_msg)[0]
# 提取出当前页面所有的套图连接
first_pic = tree.xpath('//ul[@id="showImg"]/li/img/@src')[0]
last_pic_list = tree.xpath('//ul[@id="showImg"]/li/a/img/@src')
# 把第一张图片添加进套图列表(指定第一个位置添加)
last_pic_list.insert(0, first_pic)
for pic_url, i in zip(last_pic_list, range(1, len(last_pic_list)+1)):
d = [pic_url, name, i]
q.put(d)
# download(pic_url, name, i)
# print(PIC_URL)
# 这里是首页的逻辑
def main(base_url):
# 对首页发送请求
r = requests.get(base_url, headers=headers)
r.encoding = 'gbk'
# 解析html
tree = etree.HTML(r.text)
# 提取出当前页面所有的套图连接
total_list = tree.xpath('//ul[@class="picbz"]/li/a[1]/@href')
# print(total_list, len(total_list))
for link in total_list:
# print(link)
link = 'http://www.jj20.com' + link
detail(link)
def run():
# 总共39页
for i in range(1, 39):
url = 'http://www.jj20.com/bz/ktmh/list_16_{}.html'.format(i)
main(url)
# 程序的入口
if __name__ == '__main__':
# 创建线程锁
lock = threading.Lock()
# 创建多线程
t1 = threading.Thread(target=run)
t2 = threading.Thread(target=download)
# 开启多线程
t1.start()
t2.start()
总结,多线程之间使用队列,可以让线程之间的数据传输变得更加安全
好了,以上就是本文的全部内容,希望对大家的学习有所帮助