requests模块爬取王者荣耀所有英雄皮肤

效果图:
requests模块爬取王者荣耀所有英雄皮肤_第1张图片
对应的英雄目录
requests模块爬取王者荣耀所有英雄皮肤_第2张图片
英雄对应的皮肤
代码如下:
import requests, re, os

# 获取王者荣耀官网英雄资料界面的html
def getNameAndUrl(url, toPath):
    headers = {
        "Accept" : "application/json, text/javascript, */*; q=0.01",
        "X-Requested-With" : "XMLHttpRequest",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
        "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8"
    }
    response = requests.get(url, headers=headers)
    htmlStr = response.text.encode('ISO-8859-1').decode('GBK')
    return htmlStr

# 定义获取英雄名的函数
def getHerName():
    htmlStr = getNameAndUrl(url, toPath)
    # 匹配英雄名字正则
    heroNameRe = re.compile(r'(width="91" height="91" alt=")(.*?)(">)')
    heroNameList = heroNameRe.findall(htmlStr)

    # 获取所以英雄名字
    heroNameLIST = []
    for i in heroNameList:
        heroNameLIST.append(os.path.join(toPath, i[1]))
    return heroNameLIST

# 定义获取单个英雄的资料url
def getHeroUrl():
    htmlStr = getNameAndUrl(url, toPath)
    # 匹配每个英雄对应的url正则
    heroUrlre = re.compile(r'(
  • .
    .
    requests补充,定义专用于发送请求的函数:
    import requests
    from retrying import retry
    
    headers = { "User-Agent": "Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Mobile Safari/537.36",}
    
    @retry(stop_max_attempt_number = 5)     #可以判断下面的函数如果报错, 重新连接三次
    def parseUrl(url):
        print("*" * 100)
        response = requests.get(url, headers = headers, timeout = 5)   # 可能会超时报错
        assert response.status_code == 200  #可能会请求不成功报错
        return response.content.decode()
    
    def pares_url(url):
        try:
            html = parseUrl(url)
        except Exception as  e:
            print("报错了:", e)
            html = None
        return html
    
    if __name__ == '__main__':
       # html = parseUrl("http://www.baaidu.com")
        html = pares_url("www.baidu.com")    #使用错误的url地址,查看retry的效果(结合上面的print("*" * 100))
        print(html)
        if html is None:
            print("请求不成功")
        else:
            print("请求成功了")
    
    

    你可能感兴趣的:(requests模块爬取王者荣耀所有英雄皮肤)