Python 极简美女爬虫器 Chrome

#创建一个下载美女图片的方法

# import urllib.request,parse
from urllib import request,parse
import gevent
import gevent.monkey

gevent.monkey.patch_all()  #把所有的耗时转化成gevent的函数

import re


def read_img():
    # 抓取网页内容-发送报头-1
    url = "https://www.douyu.com/directory/game/yz"
    send_headers = {
        'Host': 'www.douyu.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Connection': 'keep-alive'
    }

    # url = "http://httpbin.org/post"
    # headers = {
    #     # 伪装一个火狐浏览器
    #     "User-Agent": 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
    #     "host": 'httpbin.org'
    # }
    dict = {
        "name": "oldyang"
    }
    data = bytes(parse.urlencode(dict), encoding="utf8")
    req = request.Request(url=url, headers=send_headers, method="GET")
    response = request.urlopen(req)
    content = response.read().decode("utf-8")
    print(content)
    #正则
    reg = r'data-original="(https://.+\.jpg)"' # 定义一个正则来匹配页面当中的图片
    imgre = re.compile(reg)  # 为了让正则更快,给它来个编译

    imglist = re.findall(imgre, content)

    return imglist

def write_img(file_name,image_url):
    #读取所有图片
    req = request.urlopen(image_url)
    content = req.read()

    #写到文件中
    with open("./images/%s.jpg"%file_name,'wb') as f:
        f.write(content)

def main():
    # down_img("./美女1.jpg","https://rpic.douyucdn.cn/live-cover/appCovers/2018/03/18/4356210_20180318215315_big.jpg")
    # down_img("./美女2.jpg","https://rpic.douyucdn.cn/live-cover/appCovers/2018/01/30/2716613_20180130095710_big.jpg")
    #

    images = read_img()

    spawns = list()

    num = 0
    for image_url in images:
        spawns.append(gevent.spawn(write_img,num,image_url))
        num += 1


    #使用协程进行数据抓取
    gevent.joinall(spawns)

if __name__ == '__main__':
    main()




你可能感兴趣的:(Python 极简美女爬虫器 Chrome)