游戏,是用来放松身心的,也有很多人对游戏研究很深,今天我们就用python获取农药上的英雄的部分信息吧
# author: 、Edgar
# date: 3/8
# version: 1.0.0
import urllib
import urllib.error
import urllib.request
import re
import time
from multiprocessing import Pool
import threading
import os
try:
from bs4 import BeautifulSoup, Comment
except:
os.system("pip install bs4")
from bs4 import BeautifulSoup, Comment
class WangZheSpider:
def __init__(self, url):
self.url = url # 该网址
self.html = None # 该网址的源码
self.bs = None # 该网址的解析
self.alias = None # 该网址中英雄的别名
self.name = None # 英雄的名称
self.sort = None # 英雄的定位
self.history = None # 该英雄的历史
self.story = None # 该英雄的故事
self.data = None # 该英雄的技能信息
self.suggestion_data = None # 该英雄的技能加点建议
self.skins = None # 该英雄的皮肤信息
def get_html(self):
"""
下载网页内容,并且返回
"""
header = {
"Users-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/75.0.3770.142 Safari/537.36"}
try:
request = urllib.request.Request(self.url, headers=header)
response = urllib.request.urlopen(request)
except urllib.error.HTTPError as e:
print(e.reason)
except urllib.error.URLError as e:
print(e.reason)
except Exception as e:
print(e)
else:
if response is None:
print("No html can be downloaded")
else:
self.html = response.read()
self.bs = BeautifulSoup(self.html, "lxml")
def get_alias(self):
"""
获得英雄的别名
"""
self.alias = self.bs.find("h3", {
"class": "cover-title"}).text
return "外号: " + self.alias
def get_name(self):
"""
获得英雄的名称
"""
self.name = self.bs.find("h2", {
"class": "cover-name"}).text
return "名称: " + self.name
@staticmethod
def which_sort(sort):
"""
判断英雄的种类
"""
if sort == 1:
return "战士"
elif sort == 2:
return "法师"
elif sort == 3:
return "坦克"
elif sort == 4:
return "刺客"
elif sort == 5:
return "射手"
elif sort == 6:
return "辅助"
def get_sort(self):
# bs.find返回的类型始终是列表
self.sort = int(self.bs.find("span", {
"class": "herodetail-sort"}).find("i")["class"][0][-1])
return "定位: " + str(WangZheSpider.which_sort(self.sort))
def get_property(self):
"""
获得英雄的属性
"""
property = self.bs.find("ul", {
"class": "cover-list"})
li_list = property.findAll("li")
data = ""
for li in li_list:
text = li.find("em", {
"class": "cover-list-text fl"}).get_text()
length = li.find("span").find("i", {
"class": "ibar"})["style"][6:]
data += text + ": " + length + "\n"
return data
def get_hero_story(self):
"""
获得英雄的故事
"""
content = self.bs.find("div", {
"id": "hero-story"})
# 用replace方法去掉字符串中的空格
self.story = content.find("div", {
"class": "pop-bd"}).get_text().replace(" ", "")
if len(self.story) == 0:
return "\n暂无英雄故事"
else:
return "英雄故事:" + self.story
# history can be None
def get_hero_history(self):
"""
获得历史上的TA
"""
self.history = self.bs.find("div", {
"id": "history"}).find("p").get_text()
if len(self.history) == 0:
return "\n历史上的TA: 暂无"
else:
return "\n历史上的TA:" + self.history
def get_hero_skins(self):
"""
获得英雄的皮肤信息
"""
names = self.bs.find("div", {
"class": "pic-pf"}).find("ul")["data-imgname"].replace("|", " & ")
skins_names = "皮肤名称: " + names
return skins_names
def get_skills(self):
"""
获得英雄技能信息
"""
div_list = self.bs.find("div", "skill-show").findAll("div", {
"class": "show-list"})
data = ""
count = 0
for div in div_list:
# print("-"*40)
# print(div)
# print("-"*40)
skill_info_1 = div.find("p", {
"class": "skill-name"})
skill_name = skill_info_1.find("b").get_text()
if len(skill_name) == 0:
continue
try:
skill_cool = skill_info_1.findAll("span")[0].get_text()[4:]
except:
skill_cool = "None"
try:
skill_consumer = skill_info_1.findAll("span")[1].get_text()[3:]
except:
skill_consumer = "None"
try:
skill_desc = div.find("p", {
"class": "skill-desc"}).get_text()
except:
skill_desc = "None"
try:
skill_tips = div.find("div", {
"class": "skill-tips"}).get_text()
except:
skill_tips = "None"
count += 1
data += "\n第 {} 个技能>>\n".format(count) + "技能名称: " + skill_name + "\n" + "技能冷却: " + skill_cool + "\n" + "技能消耗: " + skill_consumer + "\n" + "技能描述: " + skill_desc + "\n" + "Tips: " + skill_tips + "\n"
return data
def get_skills_suggestion(self):
"""
获得技能加点建议
TODO: 这一部分网页源代码上的静态上的居然是一样的,但是如果自己查看的话,又是对的
"""
content = self.bs.find("div", {
"class": "sugg-info2 info"})
suggestion_list = content.findAll("p", {
"class": "sugg-name"})
suggestion_data = ""
for suggestion in suggestion_list:
sugg_b = suggestion.b.get_text()
sugg_span = suggestion.span.get_text()
suggestion_data += sugg_b + ": " + sugg_span + "\n"
return suggestion_data
# a little difficult for me to solve the javascript
# 现在自己还不知道如何实现这些,静态网站上没有相关的内容, 搜索到的不是正确的
def get_inscription_suggestion(self):
"""
获得铭文建议
TODO: 目前自己的编程能力还无法实现,会解决这类问题后再看看
"""
# with open("bs.html", "w", encoding="utf-8") as file:
# file.write(self.bs.prettify())
comment_included = self.bs.find("div", {
"class": "sugg-info info"}).find("ul", {
"class": "sugg-u1"})
# 获得代码中的注释部分
print(comment_included)
comment = comment_included.find(text=lambda text: isinstance(text, Comment))
name_patter = re.compile("()(.*?)()")
em_list = re.findall(name_patter, comment)
inscription_name_list = []
for name in em_list:
inscription_name_list.append(name[1])
print(inscription_name_list)
def get_equip_suggestion(self):
"""
获得英雄的装备推介
TODO:
"""
content = self.bs.find("div", {
"class": "equip-info l"})
print(content)
def get_strategy(self):
""""
获取攻略
TODO: 依然是需要动态加载的,或者是js,不太懂的
"""
content = self.bs.find("div", {
"class": "strategy fl"}).find("ul").findAll("li")
print(content)
def get_all_hero_links_and_names():
"""
从主页上获得所有的英雄的名称和链接
"""
baseUrl = "https://pvp.qq.com/web201605/herolist.shtml"
try:
header = {
"Users-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/75.0.3770.142 Safari/537.36"}
request = urllib.request.Request(baseUrl, headers=header)
response = urllib.request.urlopen(request)
except urllib.error.HTTPError as e:
print(e.reason)
except urllib.error.URLError as e:
print(e.reason)
except Exception as e:
print(e)
else:
bs = BeautifulSoup(response, "lxml")
li_list = bs.find("ul", {
"class": "herolist clearfix"}).findAll("li")
link_list = []
name_list = []
for li in li_list:
if li.a["href"].find("com") <= 0:
link = "https://pvp.qq.com/web201605/" + li.a["href"]
link_list.append(link)
name_list.append(li.a.get_text())
return link_list, name_list
def run(url):
all_data = ""
spider = WangZheSpider(url)
spider.get_html()
alias = spider.get_alias()
print(alias)
name = spider.get_name()
print(name)
sort = spider.get_sort()
print(sort)
data = spider.get_property()
print(data)
story = spider.get_hero_story()
print(story)
history = spider.get_hero_history()
print(history)
skills = spider.get_skills()
print(skills)
# suggestion = spider.get_skills_suggestion()
# print(suggestion)
skins = spider.get_hero_skins()
print(skins)
all_data += "-"*50 + "\n" + alias + '\n' + name + '\n' + sort + '\n' + data + '\n'
all_data += story + '\n' + history + '\n' + skills + '\n' + skins + '\n'
return all_data
if __name__ == "__main__":
all_data = ""
print("^"*50)
print("|欢迎使用、Edgar制作的程序")
print("程序运行完后会生成data.txt保存相关信息")
print("^"*50)
print("开始收集", end="")
num = 1
while num < 5:
print('.', end='', flush=True)
time.sleep(0.2)
num += 1
print("")
start_time = time.time()
link_list, name_list = get_all_hero_links_and_names()
num = 0
for link in link_list:
num += 1
print("-"*28, end="")
print("{}".format(num), end="")
print("-"*28)
# t = threading.Thread(target=run, args=(link, ))
# t.start() # 这样使用的时候会有重复
all_data += run(link)
# pool = Pool(processes=4) # 此处可以减短时间,但是pyinstaller时包装错误,另外并没有信息写入完全
# pool.map(run, link_list)
end_time = time.time()
print("")
print("用时: {} s".format(end_time - start_time))
with open("data.txt", "w", encoding="utf-8") as file:
file.write(all_data)
file.write("\n\n完成,共用时:{} s".format(end_time-start_time))
print("")
# 防止控制台闪退
input("按任意键退出......")