爬虫爬取王者荣耀英雄

爬虫爬取王者荣耀英雄

  • 爬取英雄所在标签
  • 得到英雄详情页与图片地址

爬取英雄所在标签

import requests
import bs4
from bs4 import BeautifulSoup
# https://beautifulsoup.readthedocs.io/zh_CN/v4.4.0/#


def main():
    url = u'https://pvp.qq.com/web201605/herolist.shtml'
    html = requests.get(url=url)
    html.encoding = 'gbk'
    bs = bs4.BeautifulSoup(markup=html.content, features='lxml')
    hero_list = bs.find_all(href=re.compile('herodetail'))
    file = open(file='../file/out.txt', mode='w', encoding='utf')
    for i in hero_list:
        file.write(str(i)+'\n')
    file.close()


if __name__ == '__main__':
    main()


得到英雄详情页与图片地址

import bs4
from bs4 import BeautifulSoup


def main():
    hero_href =[]
    hero_img = []
    hero_name = []
    with open(file='../file/out.txt', mode='r', encoding='utf-8') as f:
        for i in f:
            bs = bs4.BeautifulSoup(markup=i, features='lxml')
            hero_href.append(bs.a['href'])
            hero_img.append(bs.a.img['src'])
            hero_name.append(bs.img['alt'])
    href = 'https://pvp.qq.com/web201605/'
    img ='https:'
    file = open(file='../file/out1.txt', mode='w', encoding='utf-8')
    total = len(hero_href)
    for i in range(total):
        file.write(str(hero_name[i])+' '+ href+str(hero_href[i])+' '+img+str(hero_img[i])+'\n')
    file.close()


if __name__ == '__main__':
    main()

你可能感兴趣的:(python爬虫)