python爬虫--实战英雄联盟LOL壁纸下载

爬取的是国服的官网,所以图片并没有所谓的4K之类的, 尺寸可选择的也不多,所以我就下载了个人比较喜欢的两种尺寸。

import os
from urllib import request
import requests
import bs4
un_download_url = []


# 收集下载失败的信息
def un_download(file_name,url):
    fail_info = {}
    fail_info['name'] = file_name
    fail_info['url'] = url
    return fail_info

# 下载图片
def download_image(url_info,dirpath,file_name):
    for x in range(1,3):
        try:
            request.urlretrieve(url_info[x],os.path.join(dirpath, "%d.jpg" %x))
            print("%s下载完成!" % (file_name + url_info[x]))

        except:
            fail_info = un_download(file_name,url_info[x])
            un_download_url.append(fail_info)

# 获取图片的url
def git_image_url(datas):
    image_urls = []
    for data in datas:
        # print(data)
        image_info = []
        name = data['name']
        mainImg = data['mainImg']
        loadingImg = data['loadingImg']
        image_info.append(name)
        image_info.append(mainImg)
        image_info.append(loadingImg)
        image_urls.append(image_info)
    return image_urls

# def repalce_symbol(file_name):
#     specific_symbols = ['\\','*','"','?','<','>','/','|']
#     for specific_symbol in specific_symbols:
#         print(specific_symbol)
#         file_name = file_name.replace(':','-')
#         print(type(file_name))
#         file_name = file_name.repalce('d','')
#     return file_name
# 获取英雄列表的id+
def get_hero_id():
    id_url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
    header1 = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
        'referer': 'https://lol.qq.com/data/info-heros.shtml'
    }
    resp = requests.get(id_url, headers=header1)
    result = resp.json()
    result = result['hero']
    heroIds = []
    for x in result:
        heroId = x['heroId']
        heroIds.append(heroId)
    return heroIds

def main():
    heroIds = get_hero_id()
    for heroId in heroIds:
        base_url = 'https://game.gtimg.cn/images/lol/act/img/js/hero/{}.js'.format(heroId)
        header2 = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
            'referer': 'referer: https://lol.qq.com/data/info-defail.shtml?id={}'.format(heroId)
        }
        resp = requests.get(base_url,headers = header2)
        # print(resp.text)
        result = resp.json()
        datas = result['skins']

        image_urls = git_image_url(datas)
        # print(image_urls)
        for url_info in image_urls:
            file_name = url_info[0].replace(':','-').replace('/','').replace('"','')
            dirpath = os.path.join('lol',file_name)
            if not os.path.exists(dirpath):
                os.mkdir(dirpath)
                download_image(url_info,dirpath,file_name)


if __name__ == '__main__':
    main()

你可能感兴趣的:(爬虫)