使用python多线程爬取网页壁纸

最近了解了些多线程的知识,使用多线程能大大提高程序效率。自己写了个多线程爬取必应壁纸的程序分享给大家。初来乍到还请大家多多关照!!有什么问题可以留言指出。

在这里插入代码片
#-*- codeing = utf-8 -*-
#@Time : 2020/12/22 12:13
#@Author : 但丁
#@File : 必应壁纸.py
#@Software: PyCharm

import os,requests,parsel,time,threading
# 头部信息
headers = {'User-Agent':'Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+'}

# 创建文件
file_bing = '多线程必应壁纸'
if not os.path.exists(file_bing):
    os.mkdir(file_bing)

time1 = time.time()

def get_url(url):
     tupian_url_list = []
     tupian_name_list = []

     response_1 = requests.get(url=url,headers=headers).text
     tupian_url = parsel.Selector(response_1).xpath('//img//@data-progressive').getall()
     tupian_name = parsel.Selector(response_1).xpath('//h3/text()').getall()

     # 获取 1920x1080 的url
     for i in tupian_url:
          url = i.split('_')[0] + '_' + i.split('_')[1] + '_1920x1080.jpg?imageslim'
          tupian_url_list.append(url)

     # 获取图片名称
     for j in tupian_name:
          name = j.split('(©')[0] + '.jpg'
          tupian_name_list.append(name)

     # return tupian_url_list
     return tupian_url_list, tupian_name_list

def saveData(url_name):
     for n in range(len(url_name[0])):
          name = url_name[1][n]
          url = url_name[0][n]
          data = requests.get(url=url, headers=headers).content
          with open('多线程必应壁纸\\' + name, 'wb') as f:
               f.write(data)
               print(f'图片【{name}】保存成功!!')

def main(url):
     url_name = get_url(url)
     saveData(url_name)

if __name__ == '__main__':
     down_yema = int(input('请输入你想下载页码数(一页为12张):')) + 1
     threads = []
     for yema in range(1, down_yema):
          url = f'https://bing.ioliu.cn/?p={yema}'
          thread = threading.Thread(target=main,args=(url,))
          thread.start()
          threads.append(thread)
          print(thread.name,thread.ident)

     for t in threads:
          t.join()

     print(f'程序运行完毕!总耗时:{time.time() - time1}s')

下面是运行图片:
使用python多线程爬取网页壁纸_第1张图片
可以看出使用多线程爬取两百多张1080p高清壁纸只需要22s左右,可见灵活使用多线程能大大提高程序运行效率!!
这次分享就到这里了,谢谢大家阅读!

你可能感兴趣的:(笔记,python,爬虫,多线程)