利用python获取王者荣耀英雄的相关信息

游戏,是用来放松身心的,也有很多人对游戏研究很深,今天我们就用python获取农药上的英雄的部分信息吧

# author: 、Edgar
# date: 3/8
# version: 1.0.0

import urllib
import urllib.error
import urllib.request

import re
import time
from multiprocessing import Pool
import threading
import os

try:
    from bs4 import BeautifulSoup, Comment
except:
    os.system("pip install bs4")


from bs4 import BeautifulSoup, Comment

class WangZheSpider:
    def __init__(self, url):
        self.url = url  # 该网址
        self.html = None  # 该网址的源码
        self.bs = None  # 该网址的解析
        self.alias = None  # 该网址中英雄的别名
        self.name = None  # 英雄的名称
        self.sort = None  # 英雄的定位
        self.history = None  # 该英雄的历史
        self.story = None  # 该英雄的故事
        self.data = None  # 该英雄的技能信息
        self.suggestion_data = None  # 该英雄的技能加点建议
        self.skins = None  # 该英雄的皮肤信息

    def get_html(self):
        """
        下载网页内容,并且返回
        """
        header = {
     "Users-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                                 "Chrome/75.0.3770.142 Safari/537.36"}
        try:
            request = urllib.request.Request(self.url, headers=header)
            response = urllib.request.urlopen(request)
        except urllib.error.HTTPError as e:
            print(e.reason)
        except urllib.error.URLError as e:
            print(e.reason)
        except Exception as e:
            print(e)
        else:
            if response is None:
                print("No html can be downloaded")
            else:
                self.html = response.read()
                self.bs = BeautifulSoup(self.html, "lxml")

    def get_alias(self):
        """
        获得英雄的别名
        """
        self.alias = self.bs.find("h3", {
     "class": "cover-title"}).text
        return "外号: " + self.alias

    def get_name(self):
        """
        获得英雄的名称
        """
        self.name = self.bs.find("h2", {
     "class": "cover-name"}).text
        return "名称: " + self.name

    @staticmethod
    def which_sort(sort):
        """
        判断英雄的种类
        """
        if sort == 1:
            return "战士"
        elif sort == 2:
            return "法师"
        elif sort == 3:
            return "坦克"
        elif sort == 4:
            return "刺客"
        elif sort == 5:
            return "射手"
        elif sort == 6:
            return "辅助"

    def get_sort(self):
        # bs.find返回的类型始终是列表
        self.sort = int(self.bs.find("span", {
     "class": "herodetail-sort"}).find("i")["class"][0][-1])
        return "定位: " + str(WangZheSpider.which_sort(self.sort))

    def get_property(self):
        """
        获得英雄的属性
        """
        property = self.bs.find("ul", {
     "class": "cover-list"})
        li_list = property.findAll("li")
        data = ""
        for li in li_list:
            text = li.find("em", {
     "class": "cover-list-text fl"}).get_text()
            length = li.find("span").find("i", {
     "class": "ibar"})["style"][6:]
            data += text + ":  " + length + "\n"

        return data

    def get_hero_story(self):
        """
        获得英雄的故事
        """
        content = self.bs.find("div", {
     "id": "hero-story"})
        # 用replace方法去掉字符串中的空格
        self.story = content.find("div", {
     "class": "pop-bd"}).get_text().replace(" ", "")
        if len(self.story) == 0:
            return "\n暂无英雄故事"
        else:
            return "英雄故事:" + self.story

    # history can be None
    def get_hero_history(self):
        """
        获得历史上的TA
        """
        self.history = self.bs.find("div", {
     "id": "history"}).find("p").get_text()
        if len(self.history) == 0:
            return "\n历史上的TA: 暂无"
        else:
            return "\n历史上的TA:" + self.history

    def get_hero_skins(self):
        """
        获得英雄的皮肤信息
        """
        names = self.bs.find("div", {
     "class": "pic-pf"}).find("ul")["data-imgname"].replace("|", " & ")
        skins_names = "皮肤名称: " + names
        return skins_names

    def get_skills(self):
        """
        获得英雄技能信息
        """
        div_list = self.bs.find("div", "skill-show").findAll("div", {
     "class": "show-list"})
        data = ""
        count = 0
        for div in div_list:
            # print("-"*40)
            # print(div)
            # print("-"*40)
            skill_info_1 = div.find("p", {
     "class": "skill-name"})
            skill_name = skill_info_1.find("b").get_text()
            if len(skill_name) == 0:
                continue
            try:
                skill_cool = skill_info_1.findAll("span")[0].get_text()[4:]
            except:
                skill_cool = "None"
            try:
                skill_consumer = skill_info_1.findAll("span")[1].get_text()[3:]
            except:
                skill_consumer = "None"
            try:
                skill_desc = div.find("p", {
     "class": "skill-desc"}).get_text()
            except:
                skill_desc = "None"
            try:
                skill_tips = div.find("div", {
     "class": "skill-tips"}).get_text()
            except:
                skill_tips = "None"
            count += 1
            data += "\n第 {} 个技能>>\n".format(count) + "技能名称: " + skill_name + "\n" + "技能冷却: " + skill_cool + "\n" + "技能消耗: " + skill_consumer + "\n" + "技能描述: " + skill_desc + "\n" + "Tips: " + skill_tips + "\n"

        return data

    def get_skills_suggestion(self):
        """
        获得技能加点建议
        TODO: 这一部分网页源代码上的静态上的居然是一样的,但是如果自己查看的话,又是对的
        """
        content = self.bs.find("div", {
     "class": "sugg-info2 info"})
        suggestion_list = content.findAll("p", {
     "class": "sugg-name"})
        suggestion_data = ""
        for suggestion in suggestion_list:
            sugg_b = suggestion.b.get_text()
            sugg_span = suggestion.span.get_text()
            suggestion_data += sugg_b + ": " + sugg_span + "\n"
        return suggestion_data

    # a little difficult for me to solve the javascript
    # 现在自己还不知道如何实现这些,静态网站上没有相关的内容, 搜索到的不是正确的
    def get_inscription_suggestion(self):
        """
        获得铭文建议
        TODO: 目前自己的编程能力还无法实现,会解决这类问题后再看看
        """
        # with open("bs.html", "w", encoding="utf-8") as file:
        #     file.write(self.bs.prettify())
        comment_included = self.bs.find("div", {
     "class": "sugg-info info"}).find("ul", {
     "class": "sugg-u1"})
        # 获得代码中的注释部分
        print(comment_included)
        comment = comment_included.find(text=lambda text: isinstance(text, Comment))
        name_patter = re.compile("()(.*?)()")
        em_list = re.findall(name_patter, comment)
        inscription_name_list = []
        for name in em_list:
            inscription_name_list.append(name[1])
        print(inscription_name_list)

    def get_equip_suggestion(self):
        """
        获得英雄的装备推介
        TODO:
        """
        content = self.bs.find("div", {
     "class": "equip-info l"})
        print(content)

    def get_strategy(self):
        """"
        获取攻略
        TODO: 依然是需要动态加载的,或者是js,不太懂的
        """
        content = self.bs.find("div", {
     "class": "strategy fl"}).find("ul").findAll("li")
        print(content)


def get_all_hero_links_and_names():
    """
    从主页上获得所有的英雄的名称和链接
    """
    baseUrl = "https://pvp.qq.com/web201605/herolist.shtml"
    try:
        header = {
     "Users-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                                 "Chrome/75.0.3770.142 Safari/537.36"}
        request = urllib.request.Request(baseUrl, headers=header)
        response = urllib.request.urlopen(request)
    except urllib.error.HTTPError as e:
        print(e.reason)
    except urllib.error.URLError as e:
        print(e.reason)
    except Exception as e:
        print(e)
    else:
        bs = BeautifulSoup(response, "lxml")
        li_list = bs.find("ul", {
     "class": "herolist clearfix"}).findAll("li")
        link_list = []
        name_list = []
        for li in li_list:
            if li.a["href"].find("com") <= 0:
                link = "https://pvp.qq.com/web201605/" + li.a["href"]
                link_list.append(link)
                name_list.append(li.a.get_text())
        return link_list, name_list


def run(url):
    all_data = ""
    spider = WangZheSpider(url)
    spider.get_html()
    alias = spider.get_alias()
    print(alias)
    name = spider.get_name()
    print(name)
    sort = spider.get_sort()
    print(sort)
    data = spider.get_property()
    print(data)
    story = spider.get_hero_story()
    print(story)
    history = spider.get_hero_history()
    print(history)
    skills = spider.get_skills()
    print(skills)
    # suggestion = spider.get_skills_suggestion()
    # print(suggestion)
    skins = spider.get_hero_skins()
    print(skins)
    all_data += "-"*50 + "\n" + alias + '\n' + name + '\n' + sort + '\n' + data + '\n'
    all_data += story + '\n' + history + '\n' + skills + '\n' + skins + '\n'
    return all_data


if __name__ == "__main__":
    all_data = ""
    print("^"*50)
    print("|欢迎使用、Edgar制作的程序")
    print("程序运行完后会生成data.txt保存相关信息")
    print("^"*50)
    print("开始收集", end="")
    num = 1
    while num < 5:
        print('.', end='', flush=True)
        time.sleep(0.2)
        num += 1
    print("")
    start_time = time.time()
    link_list, name_list = get_all_hero_links_and_names()
    num = 0
    for link in link_list:
        num += 1
        print("-"*28, end="")
        print("{}".format(num), end="")
        print("-"*28)
        # t = threading.Thread(target=run, args=(link, ))
        # t.start() # 这样使用的时候会有重复
        all_data += run(link)
    # pool = Pool(processes=4)   # 此处可以减短时间,但是pyinstaller时包装错误,另外并没有信息写入完全
    # pool.map(run, link_list)
    end_time = time.time()
    print("")
    print("用时: {} s".format(end_time - start_time))
    with open("data.txt", "w", encoding="utf-8") as file:
        file.write(all_data)
        file.write("\n\n完成,共用时:{} s".format(end_time-start_time))
    print("")
    # 防止控制台闪退
    input("按任意键退出......")

你可能感兴趣的:(python,#,爬虫)