思路:从网上下载json格式的文件----→先打开json格式的文件,储存月份、日期、星期等的数据----→建立收盘价折线图和收盘价对数变换折线图----→为做时间序列分析数据,先进行封装函数----→做按月份、按周期、按星期的对应图----→最后做数据仪表盘
import json
#将数据加载到一个列表里
filename = 'btc_close_2017.json'
with open(filename) as f:
btc_data = json.load(f)
#打印每一天的信息
for btc_dict in btc_data:
date = btc_dict['date']
month = int(btc_dict['month'])
week = btc_dict['week']
weekday = btc_dict['weekday']
close = int(float(btc_dict['close']))
print("{} is month {} week {}, {}, the close price is {} RMB".format(date, month, week, weekday, close))
#创建5个列表,分别存储日期和收盘价
dates = []
months = []
weeks = []
weekdays = []
close = []
for btc_dict in btc_data:
dates.append(btc_dict['date'])
months.append(btc_dict['month'])
weeks.append(int(btc_dict['week']))
weekdays.append(btc_dict['weekday'])
close.append(int(float(btc_dict['close'])))
import pygal
line_chart = pygal.Line(x_label_rotation=20,show_minor_x_labels=False)
line_chart.title = '收盘价(¥)'
line_chart.x_labels = dates
N = 20#x轴坐标每隔20天显示一次
line_chart.x_labels_major = dates[::N]
line_chart.add('收盘价', close)
line_chart.render_to_file('收盘价折线图(¥).svg')
import pygal
import math
line_chart = pygal.Line(x_label_rotation=20, show_minor_x_labels=False)
line_chart.title = '收盘价对数变换(¥)'
line_chart.x_labels = dates
N = 20 #x轴坐标每隔20天显示一次
line_chart.x_labels_major = dates[::N]
close_log = [math.log10(_) for _ in close]
line_chart.add('log收盘价', close_log)
line_chart.render_to_file('收盘价对数变换折线图(¥).svg')
from itertools import groupby
def draw_line(x_data, y_data, title, y_legend):
xy_map = []
for x, y in groupby(sorted(zip(x_data, y_data)), key=lambda _: _[0]):
y_list = [v for _, v in y]
xy_map.append([str(x), sum(y_list) / len(y_list)])#(x, x月的每日均值)
x_unique, y_mean = [*zip(*xy_map)]
line_chart = pygal.Line()
line_chart.title = title
line_chart.x_labels = x_unique
line_chart.add(y_legend, y_mean)
line_chart.render_to_file(str(title)+' .svg')
return line_chart
idx_month = dates.index('2017-12-01')
line_chart_month = draw_line(months[:idx_month], close[:idx_month], '收盘价月日均值(¥)', '月日均值')
line_chart_month
idx_week = dates.index('2017-12-11')
line_chart_week = draw_line(weeks[1:idx_week], close[1:idx_week], '收盘价周日均值(¥)', '周日均值')
line_chart_week
idx_week = dates.index('2017-12-11')
wd = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekdays_int = [wd.index(w) + 1 for w in weekdays[1:idx_week]]
line_chart_weekday = draw_line(weekdays_int, close[1:idx_week], '收盘价星期均值', '星期均值')
line_chart_weekday.x_labels = ['周一', '周二', '周三', '周四', '周五', '周六', '周日']
with open('收盘价Dashboard.html', 'w',encoding='utf8') as html_file:
html_file.write('收盘价Dashboard \n' )
for svg in [
'收盘价折线图(¥).svg', '收盘价对数变换折线图(¥).svg', '收盘价月日均值(¥).svg',
'收盘价周日均值(¥).svg', '收盘价星期均值(¥).svg']:
html_file.write(' \n'.format(svg))
html_file.write('')
最后生成5张图:
收盘价对数变换折线图为:
收盘价星期均值为:
收盘价月日均值:
收盘价折线图:
收盘价周日均值:
收盘价Dashboard:
过程中遇到的难点:
函数:以作月日均值为例
sorted(zip(x_data,y_data)返回一个排序后的元组列表为[(1,5383),(1,5566)…(1,7070)…(11,65583)]
lambda是隐形函数,key = lambda_: [0]是按元组中的第一个元素分组,[1]的话是按第二个元素分组
groupby()进行分组,key = lambda: _[0]代表:按照元组列表中的第一个元素进行分类,可被分成11组,x为1~11月份,y是对应的元组,即:
1[(1,5383),(1,5566)…(1,7835)]
2[(2,6793),(2,6811)…(2,8076)]依次类推