python 实现多任务协程下载斗鱼平台图片
import re
import gevent
from gevent import monkey, pool
import time, random
import urllib.request
monkey.patch_all() # put a patch
def down_load_img(img_url, img_name):
"""
:param img_url: the url of the picture that you need
:param img_name: the name of the picture that you call
example:
when get a url of a picture,this function will request
the internet and download the picture ,then it will save
in the path you supply
"""
img = urllib.request.urlopen(img_url) # request the internet
img_content = img.read() # read the content of the web page according to the url
# make a new file and save it with the name you supply
with open(r"G:\myproject_jiuye\hm_424_430\正则\beauty\%s" % img_name, "wb") as f1:
f1.write(img_content)
time.sleep(random.random())
def main():
p = pool.Pool(5) # limit thr number of the Coroutines(协程)
local_addr = r"G:\myproject_jiuye\hm_424_430\正则\douyu_url.txt"
url_txt = open(local_addr, "r", encoding="utf-8") # open the text of the code
#read the content of the picture and save it to a variable(变量)
url_content = url_txt.read()
ret_list = re.findall("https://.*?.jpg", url_content) # match the regular expression
# traversal list
num = 0
my_list = []
t_start = time.time() #mark the start time
for img_url in ret_list:
my_list.append(p.spawn(down_load_img, img_url, "%d.jpg" % num))
if num == 100: # you can choose the num of the picture taht you want to download
break
num += 1
gevent.joinall(my_list) #add the task to the Coroutines
t_stop = time.time() #mark the stop time
print("Download completed!Enjoy now!Time of use:%.2fS" % (t_stop - t_start))
if __name__ == '__main__':
main()