练习的目的是为了更好地学习pandas 的用法,画直方图,折线图,密度图,箱体图。
读取CSV文件:
from pylab import *
import pandas as pd
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif'] = ['Droid Sans Fallback']
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
# 通过读取CVS文件获取不同的数据类型
def get_file_by_type(file, type):
# 读取CVS文件
df_frame = pd.read_csv(file, encoding='utf-8')
df_frame['Date'] = pd.to_datetime(df_frame['Date']) # 设置CSV 文件的Index
df = df_frame.set_index('Date')
# 按照指标显示,并且和统计
df_date = df.resample(type).sum().to_period(type)
return df_date
画直方图:
def plot_change_amount_bar():
excel_name = ['600036.csv', '601939.csv', '601398.csv', '601288.csv', '601166.csv', '601988.csv']
change = []
amount = []
for each in excel_name:
df_date = get_file_by_type(each, 'M')
change.append(round(df_date['p_change'].sum(), 3))
amount.append(round(df_date['t_volume'].sum()/1000000000, 3))
plt.title("各大银行2019年1-7月份数据分析直方图")
xl = np.linspace(1,50,6)
lables = ['招商银行', '建设银行', '工商银行', '农业银行', '兴业银行', '中国银行']
react1 = plt.bar(xl, change, width=0.8, facecolor='#9999ff', edgecolor='white', linewidth=1)
react2 = plt.bar(xl + 0.8, amount, width=0.8, facecolor='#ff9999', edgecolor='white', linewidth=1)
plt.xlim(0, 60) # x轴的范围
plt.xticks(xl, lables, rotation=45) # x 轴的刻度
plt.ylim((1, 220)) # Y的范围
plt.yticks([y for y in range(0, 220, 10)]) # Y 的刻度# #
plt.legend(handles=[react1, react2], labels=['range', 'amount(billion)'], loc='upper right') # 添加图例,标明信息
plt.ylabel('成交金额(十亿)')
plt.xlabel('各大银行')
for x, y in zip(xl, change):
plt.text(x + 0.4, y + 0.05, '%.3f' % y, ha='center', va='bottom', fontsize=6)
for x, y in zip(xl, amount):
plt.text(x + 0.8 + 0.4, y + 0.05, '%.3f' % y, ha='center', va='bottom', fontsize=6)
plt.show()
月涨幅折线图:
def plot_change_amount_line():
lable_x = ['招商银行', '建设银行', '工商银行', '农业银行', '兴业银行', '中国银行']
info = {}
excel_name = {"招商银行": "600036.csv", "建设银行": "601939.csv",
"工商银行": "601398.csv", "农业银行": "601288.csv", "兴业银行": "601166.csv",
"中国银行": "601988.csv"}
for name, file in excel_name.items():
result = get_file_by_type(file, 'M')
info[name] = result['p_change'].values
df_lineplot = pd.DataFrame(info)
ax_line = df_lineplot.plot(kind='line')
# plt.figure()
plt.title("几大银行月涨幅数据分析虚线图")
# line1, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('600036.csv', 'M')['p_change'], 3)),
# color='red', linewidth=1.0, linestyle='--')
# line2, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('601939.csv', 'M')['p_change'], 3)),
# color='blue', linewidth=1.0, linestyle='--')
# line3, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('601398.csv', 'M')['p_change'], 3)),
# color='yellow', linewidth=1.0, linestyle='--')
# line4, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('601288.csv', 'M')['p_change'], 3)),
# color='green', linewidth=1.0, linestyle='--')
# line5, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('601166.csv', 'M')['p_change'],3)),
# color='pink', linewidth=1.0, linestyle='--')
# line6, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('601988.csv', 'M')['p_change'],3)),
# color='black', linewidth=1.0, linestyle='--')
# plt.legend(handles=[line1, line2, line3, line4, line5, line6], labels=lable_x, loc='upper right') # 添加图例,标明信息
plt.ylim((-15, 20))
plt.xlim((1, 6))
plt.xlabel('月份')
plt.ylabel('月涨幅%')
plt.show()
涨幅密度图:
# 涨幅密度图
def plot_month_change_density(type):
info= {}
excel_name = {"招商银行": "600036.csv", "建设银行": "601939.csv",
"工商银行": "601398.csv", "农业银行": "601288.csv", "兴业银行": "601166.csv",
"中国银行": "601988.csv"}
for name, file in excel_name.items():
result = get_file_by_type(file, type)
info[name] = result['p_change'].values
df_desity = pd.DataFrame(info)
ax = df_desity.plot(kind='kde')
plt.xlabel("波动百分比")
if type == 'M':
plt.title('By month')
elif type == 'W':
plt.title('By week')
plt.show()
涨幅箱体图:
# 月涨幅箱体图
def plot_month_change_boxplot():
info = {}
excel_name = {"招商银行": "600036.csv", "建设银行": "601939.csv",
"工商银行": "601398.csv", "农业银行": "601288.csv", "兴业银行": "601166.csv",
"中国银行": "601988.csv"}
for name, file in excel_name.items():
result = get_file_by_type(file, 'M')
info[name] = result['p_change'].values
df_boxplot = pd.DataFrame(info)
ax = df_boxplot.boxplot()
plt.title("几大银行月涨幅箱体图")
plt.ylabel('波动百分比')
plt.show()
程序入口全部代码:
if __name__ == '__main__':
plot_change_amount_bar()
plot_change_amount_line()
plot_month_change_density('M')
plot_month_change_density('W')
plot_month_change_boxplot()