需求:抓取每天的nba赛况,排名,明日预告,每天中午12:30发送到邮箱。
效果:
实现步骤:
http://data.sports.sohu.com/nba/frag/index_live_json.html?_=1540985005305
不难发现其后缀是一个毫秒级的时间戳。我们在请求是加上当前时间戳。解析返回的json数据,获取球队、分数信息,我们只取今天和明天的比赛信息。
import requests
import json
import time
import yagmail
from lxml import etree
class NbaNews(object):
def __init__(self):
self.session = requests.session()
self.sohu_nba_game = "http://data.sports.sohu.com/nba/frag/index_live_json.html?_="
# 比赛信息接口API
self.baidu_nba_rank = "http://tiyu.baidu.com/match/nba/tab/%E6%8E%92%E5%90%8D/from/baidu_aladdin"
# 球队排名页面
self.headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}
def game_data(self):
tmstp = int(time.time()*1000) # 获取当前时间戳
response_data = self.session.get(self.sohu_nba_game.format(tmstp), headers=self.headers).text
game_data = json.loads(response_data)
return game_data
def game_detail(self, game_data):
game_details = []
for game in game_data[1:]:
v_team = game['vnamecn'] # visitname做客球队
h_team = game['hnamecn'] # hostname主场球队
# 这里我将三个字以上的球队名称改为两个字,只是为了邮件展示时格式对齐好看些
if v_team == "网" : v_team = "篮网"
if h_team == "网" : h_team = "篮网"
if v_team == "凯尔特人": v_team = "凯子"
if v_team == "开拓者": v_team = "开拓"
if v_team == "步行者": v_team = "步行"
if v_team == "尼克斯": v_team = "尼克"
if v_team == "森林狼": v_team = "森狼"
try:
v_score = game['vtotal'] # 客队得分
h_score = game['htotal'] # 主队得分
# 分数为None是明日比赛,不记录分数
except:
v_score = None
h_score = None
game_result = {}
game_result[v_team] = v_score
game_result[h_team] = h_score
game_details.append(game_result)
# print(game_details)
return game_details
# 整理比赛结果,输出一段字符串作为邮件正文
def game_report(self, game_details):
if tuple(game_details[0].values())[0] is not None:
# 分数非空,今日比赛,获取日期
game_reports = '-'*5 + \
time.strftime('%Y-%m-%d', time.localtime(time.time())) + \
'/今天\tNBA赛程报告' + '-'*5 + '\n'
for game in game_details:
game_report = \
'(客) '.join(tuple(game.items())[0]) + \
' vs ' + \
' (主)'.join(sorted(tuple(game.items())[1])) + '\n'
game_reports += game_report
else:
# 明日比赛球队
game_reports = '-' * 5 + \
time.strftime('%Y-%m-%d', time.localtime(time.time()+86400)) + \
'/明天\tNBA赛程预告' + '-' * 5 + '\n'
for game in game_details:
game_report = \
tuple(game.keys())[0] + '(客)' + \
' vs ' + \
'(主)'+ tuple(game.keys())[1] + '\n'
game_reports += game_report
return game_reports
def east_team_rank(self):
r = self.session.get(self.baidu_nba_rank, headers=self.headers)
tree = etree.HTML(r.text)
rank_title = '-' * 5 + \
time.strftime('%Y-%m-%d', time.localtime(time.time())) + \
'\tNBA东部前八' + '-' * 5 + '\n' + \
'排名\t球队\t胜\t负\t胜率\n'
team_ranks = ''
for i in range(3, 11):
# 第一到第八名球队信息
east_t = tree.xpath(
'//*[@id="matchContainer"]/div[1]/div[5]/div/div/div/div[1]/div/div/div[1]/div[{}]/div/a/div[3]/text()'.format(
i))[0].strip()
east_w = tree.xpath(
'//*[@id="matchContainer"]/div[1]/div[5]/div/div/div/div[1]/div/div/div[1]/div[{}]/div/a/div[4]/text()'.format(
i))[0].strip()
east_f = tree.xpath(
'//*[@id="matchContainer"]/div[1]/div[5]/div/div/div/div[1]/div/div/div[1]/div[{}]/div/a/div[5]/text()'.format(
i))[0].strip()
east_p = tree.xpath(
'//*[@id="matchContainer"]/div[1]/div[5]/div/div/div/div[1]/div/div/div[1]/div[{}]/div/a/div[6]/text()'.format(
i))[0].strip()
# 同样为了显示对齐改一下名字
if east_t == "步行者": east_t = "步行"
if east_t == "凯尔特人": east_t = "凯子"
if east_t == "开拓者": east_t = "开拓"
if east_t == "尼克斯": east_t = "尼克"
if east_t == "森林狼": east_t = "森狼"
if east_t == "独行侠": east_t = "小牛"
team_rank = str(i-2) + '\t' + east_t + '\t' + east_w + '\t' + east_f + '\t' + east_p + '\n'
team_ranks += team_rank
east_team_rank = rank_title + team_ranks
return east_team_rank
# 复制一遍不管了
def west_team_rank(self):
r = self.session.get(self.baidu_nba_rank, headers=self.headers)
tree = etree.HTML(r.text)
rank_title = '-' * 5 + \
time.strftime('%Y-%m-%d', time.localtime(time.time())) + \
'\tNBA西部前八' + '-' * 5 + '\n' + \
'排名\t球队\t胜\t负\t胜率\n'
team_ranks = ''
for i in range(3, 11):
east_t = tree.xpath(
'//*[@id="matchContainer"]/div[1]/div[5]/div/div/div/div[1]/div/div/div[2]/div[{}]/div/a/div[3]/text()'.format(
i))[0].strip()
east_w = tree.xpath(
'//*[@id="matchContainer"]/div[1]/div[5]/div/div/div/div[1]/div/div/div[2]/div[{}]/div/a/div[4]/text()'.format(
i))[0].strip()
east_f = tree.xpath(
'//*[@id="matchContainer"]/div[1]/div[5]/div/div/div/div[1]/div/div/div[2]/div[{}]/div/a/div[5]/text()'.format(
i))[0].strip()
east_p = tree.xpath(
'//*[@id="matchContainer"]/div[1]/div[5]/div/div/div/div[1]/div/div/div[2]/div[{}]/div/a/div[6]/text()'.format(
i))[0].strip()
if east_t == "步行者": east_t = "步行"
if east_t == "凯尔特人": east_t = "凯子"
if east_t == "开拓者": east_t = "开拓"
if east_t == "尼克斯": east_t = "尼克"
if east_t == "森林狼": east_t = "森狼"
if east_t == "独行侠": east_t = "小牛"
team_rank = str(i-2) + '\t' + east_t + '\t' + east_w + '\t' + east_f + '\t' + east_p + '\n'
team_ranks += team_rank
west_team_rank = rank_title + team_ranks
return west_team_rank
# 三行搞定邮件发送
def mail_report(self, report):
# 这里密码是邮箱STMP服务授权码
yag = yagmail.SMTP(user="[email protected]", password="xxx", host='smtp.163.com')
contents = report # 之前获取的信息
yag.send(to="[email protected]", subject="每日NBA快报", contents=contents)
def main(self):
game_data = self.game_data()
try:
today_d = self.game_detail(game_data[0])
tomorrow_d = self.game_detail(game_data[1])
today_r = self.game_report(today_d)
tomorrow_r = self.game_report(tomorrow_d)
print(today_r + '\n' + tomorrow_r)
w_rank = self.west_team_rank()
e_rank = self.east_team_rank()
print(w_rank + '\n' + e_rank)
self.mail_report(today_r + '\n' + tomorrow_r + '\n' + w_rank + '\n' + e_rank + '\n')
except TypeError:
self.main()
if __name__ == '__main__':
NbaNews().main()
把代码扔到服务器上,设置定时任务,比如每天12:30,这个时间早上大部分比赛都打完了,也刚吃完饭回来,那就看一下邮箱吧,每日NBA快报已经送达!