不知不觉,在b站(哔哩哔哩)做了一段时间的萌新up主了。(虽然是萌新,但是还是有颗想当大V的心的)古人云:工欲善其事,必先利其器。“器”在这里我定义为数据分析,用数据驱动产品。
哔哩哔哩的创作中心数据趋势只展示七天以内的数据。于是萌生出写一个dashboard,把哔哩哔哩和YouTube的数据都放在mysql,用dashboard展示,还能从mysql拉历史数据,从不同纬度进行数据分析。
最下边有完整源码的github地址
第一个版本的demo长这样:
开始写dashboard
图表的生成用到了pyecharts的库,官方提供了个生成html的render()方法和生成多图的page()方法,但是只能固定排列样式。temp传入后,要多图也只能for循环,不能固定样式。
于是,到pyecharts源码,添加了个render_html_content()方法,作用是,生成一个包含图表html代码的对象。然后就可以和python3,format()方法结合生成index.html
注:web前端用了开源的keen/dashboards
format()技巧:
例如我们在html_temp.html写个{all},读取html_temp内容作为obj对象
然后obj.format(all=all),右边的all是html内容,执行后,就可以把{all}替换为html内容。
pyecharts源码修改:
(如何找到pip3安装的第三方库的地址?)
运行python3,
import pyecharts
pyecharts
就有地址了
修改render/engine.py
并在render类添加个render_html_content()函数
def render_html_content(self, template_name: str, chart: Any, path: str, **kwargs):
tpl = self.env.get_template(template_name)
html = utils.replace_placeholder(
tpl.render(chart=self.generate_js_link(chart), **kwargs)
)
return html
修改charts/base.py
并在base类添加个render_html_content()函数
def render_html_content(
self,
path: str = "render.html",
template_name: str = "simple_chart.html",
env: Optional[Environment] = None,
**kwargs,
) -> str:
self._prepare_render()
return engine.render_html_content(self, path, template_name, env, **kwargs)
在render/templates添加个temp.html
{% import 'macro' as macro %}
{{ macro.render_chart_content(chart) }}
这个temp.html 去掉了等标签,只输出图表的html代码
这时只需要调用
图表对象.render_html_content(template_name="temp.html")
和
html_obj.format(all=all)
就可以把图表html替换掉模版html的{all}
获取哔哩哔哩数据:
光有图表没数据可不行
以下几个公开的API可以获取播放量、粉丝数、点赞等数据
https://api.bilibili.com/x/relation/stat?vmid=哔哩哔哩id
https://api.bilibili.com/x/space/upstat?mid=哔哩哔哩id
http://api.bilibili.com/x/space/navnum?mid=哔哩哔哩id
我们可以先建一个bilibili表,然后把数据插入进去
表结构为:
CREATE TABLE bilibili (
id int(8) unsigned NOT NULL AUTO_INCREMENT,
view int(9) NOT NULL COMMENT '播放总数',
follower int(9) NOT NULL COMMENT '被关注数',
likes int(9) NOT NULL COMMENT '点赞数',
video_count int(9) NOT NULL COMMENT '视频数',
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
获取数据的python脚本:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os,requests,json,pymysql
class spider(object):
"""docstring for zs_spider"""
def __init__(self):
# create connection object
self.conn = pymysql.connect(host='192.168.28.140',port=3306,user='test',passwd='test123',db='test',charset='utf8')
self.cursor = self.conn.cursor()
self.headers = {
"user-agent": "Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)",
"referer":"https://space.bilibili.com/164106011/video",
}
self.vmid = "164106011"
def __del__(self):
# close connection object
self.cursor.close()
self.conn.close()
def insert_testdata(self):
sql = """select count(*) from bilibili;"""
self.cursor.execute(sql)
countNum = self.cursor.fetchall()[0][0]
if countNum <= 5:
for i in range(5 - countNum):
self.insert_to_database(1000*i,10*i,10*i,1*i)
self.conn.commit()
print("已插入测试数据")
def insert_to_database(self,view,follower,likes,video_count):
#
sql = """INSERT INTO bilibili (view,follower,likes,video_count) VALUES ( %d, %d,%d, %d) """
data = (view,follower,likes,video_count)
self.cursor.execute(sql % data)
print("已插入今日数据")
def select_data(self):
sql = """select * from bilibili order by id DESC limit 6;"""
self.cursor.execute(sql)
return self.cursor.fetchall()
def spider_get_data(self):
follower = json.loads(requests.get("https://api.bilibili.com/x/relation/stat?vmid="+self.vmid,headers=self.headers).text)["data"]["follower"]
upstat = json.loads(requests.get("https://api.bilibili.com/x/space/upstat?mid="+self.vmid,headers=self.headers).text)["data"]
view = upstat["archive"]["view"]
likes = upstat["likes"]
video_count = json.loads(requests.get("http://api.bilibili.com/x/space/navnum?mid="+self.vmid,headers=self.headers).text)["data"]["video"]
self.insert_to_database(view,follower,likes,video_count)
self.conn.commit()
def main():
bilibili = spider()
# bilibili.spider()
if __name__ == '__main__':
main()
Python获取近五天日期的列表:
import datetime
def get_date():
date = list()
for i in range(5):
date.append((datetime.date.today() + datetime.timedelta(days = -i)).strftime("%m月%d日"))
return date
date = get_date()[::-1] # 获取五天的日期
最后附上生成dashboard脚本:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pyecharts.faker import Faker
from pyecharts import options as opts
from pyecharts.charts import Pie,Page,Line
from pyecharts.globals import ThemeType
import get_data
import datetime
with open("index_temp.html","r") as f:
f.readline().rstrip("\n bg")
index_content = f.read()
f.close()
def line_center(width,height,title,date,view):
c = (
Line(init_opts=opts.InitOpts(theme=ThemeType.CHALK,width=width,height=height))
.add_xaxis(date)
.add_yaxis("哔哩哔哩", view)
# .add_yaxis("YouTube", [3,2,55,4,5])
.set_series_opts(
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=True),
)
.set_global_opts(
xaxis_opts=opts.AxisOpts(
axistick_opts=opts.AxisTickOpts(is_align_with_label=True),
is_scale=False,
boundary_gap=False,
),
)
)
return c
def line_left(width,height,title,date,data):
c = (
Line(init_opts=opts.InitOpts(theme=ThemeType.CHALK,width=width,height=height))
.add_xaxis(date)
.add_yaxis(title, data)
.set_series_opts(
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=True),
)
.set_global_opts(
yaxis_opts=opts.AxisOpts(name="单位:/千人",
axislabel_opts=opts.LabelOpts(formatter="{value} K"),
),
)
)
return c
def line_right(width,height,title,date,data):
c = (
Line(init_opts=opts.InitOpts(theme=ThemeType.CHALK,width=width,height=height))
.add_xaxis(date)
.add_yaxis(title, data)
.set_series_opts(
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=True),
)
)
return c
def bottom_all(width,height,title,date,view,follower,likes,video_count):
c = (
Line(init_opts=opts.InitOpts(theme=ThemeType.CHALK,width=width,height=height))
.add_xaxis(date)
.add_yaxis(
series_name="被关注数",
stack="总量",
y_axis=follower,
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="点赞数",
stack="总量",
y_axis=likes,
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="视频总数",
stack="总量",
y_axis=video_count,
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="播放总数",
stack="总量",
y_axis=view,
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=False),
)
.set_global_opts(
tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
yaxis_opts=opts.AxisOpts(
type_="value",
axistick_opts=opts.AxisTickOpts(is_show=True),
splitline_opts=opts.SplitLineOpts(is_show=True),
),
xaxis_opts=opts.AxisOpts(type_="category", boundary_gap=False),
)
)
return c
def get_date():
date = list()
for i in range(5):
date.append((datetime.date.today() + datetime.timedelta(days = -i)).strftime("%m月%d日"))
return date
def write_html_to_file(format_content):
with open("index.html","w+") as f:
f.write(format_content)
f.close
def main():
get_data.spider().insert_testdata() #如果数据不存在,插入前五天的测试数据
date = get_date()[::-1] # 获取五天的日期
# get_data.spider().spider_get_data()
data = get_data.spider().select_data()[::-1] # 爬取哔哩哔哩 用户数据
view = [x[1] for x in data[1:]] # 从用户数据提取 播放数
follower = [x[2] for x in data[1:]] # 从用户数据提取 关注数
likes =[x[3] for x in data[1:]] # 从用户数据提取 点赞数
video_count =[x[4] for x in data[1:]] # 从用户数据提取 视频播放数
view_six_day = [x[1] for x in data]
view_sub = [(view_six_day[x+1]-view_six_day[x])/1000 for x in range(len(view_six_day)-1)]
follower_six_day = [x[2] for x in data]
follower_sub = [follower_six_day[x+1]-follower_six_day[x] for x in range(len(follower_six_day)-1)]
# 开始画图并生成html
# "256px","325px"
all = line_center("533px","325px","总曝光量",date,view).render_html_content(template_name="temp.html")
line_left_bilibili = line_left("310px","325px","新增播放",date,view_sub).render_html_content(template_name="temp.html")
line_right_bilibili = line_right("310px","325px","新增关注",date,follower_sub).render_html_content(template_name="temp.html")
bottom = bottom_all("1226px","600px","新增数",date,view,follower,likes,video_count).render_html_content(template_name="temp.html")
format_content = index_content.format(all=all,line_left_bilibili=line_left_bilibili,line_right_bilibili=line_right_bilibili,bottom_all=bottom)
print(all)
write_html_to_file(format_content)
print("index.html生成成功")
if __name__ == '__main__':
main()
完整源码的github地址:
https://github.com/guyuxiu/project
参考文献:
【pyechart文档】https://pyecharts.org/
【dashboards源码】https://github.com/keen/dashboards
【哔哩哔哩 API】https://github.com/SocialSisterYi/bilibili-API-collect/