Pandas plot 爬取的数据

练习的目的是为了更好地学习pandas 的用法,画直方图,折线图,密度图,箱体图。

读取CSV文件:

from pylab import *
import pandas as pd
import matplotlib.pyplot as plt

mpl.rcParams['font.sans-serif'] = ['Droid Sans Fallback']
plt.rcParams['font.sans-serif'] = ['SimHei']    # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False      # 用来正常显示负号
# 通过读取CVS文件获取不同的数据类型
def get_file_by_type(file, type):
    # 读取CVS文件
    df_frame = pd.read_csv(file, encoding='utf-8')
    df_frame['Date'] = pd.to_datetime(df_frame['Date'])  # 设置CSV 文件的Index
    df = df_frame.set_index('Date')
    # 按照指标显示,并且和统计
    df_date = df.resample(type).sum().to_period(type)
    return df_date

画直方图:

def plot_change_amount_bar():
    excel_name = ['600036.csv', '601939.csv', '601398.csv', '601288.csv', '601166.csv', '601988.csv']
    change = []
    amount = []
    for each in excel_name:
        df_date = get_file_by_type(each, 'M')
        change.append(round(df_date['p_change'].sum(), 3))
        amount.append(round(df_date['t_volume'].sum()/1000000000, 3))
    plt.title("各大银行2019年1-7月份数据分析直方图")
    xl = np.linspace(1,50,6)
    lables = ['招商银行', '建设银行', '工商银行', '农业银行', '兴业银行', '中国银行']
    react1 = plt.bar(xl, change, width=0.8, facecolor='#9999ff', edgecolor='white', linewidth=1)
    react2 = plt.bar(xl + 0.8, amount, width=0.8, facecolor='#ff9999', edgecolor='white', linewidth=1)
    plt.xlim(0, 60)  # x轴的范围
    plt.xticks(xl, lables, rotation=45)  # x 轴的刻度
    plt.ylim((1, 220))  # Y的范围
    plt.yticks([y for y in range(0, 220, 10)])  # Y 的刻度#  #
    plt.legend(handles=[react1, react2], labels=['range', 'amount(billion)'], loc='upper right')  # 添加图例,标明信息
    plt.ylabel('成交金额(十亿)')
    plt.xlabel('各大银行')
    for x, y in zip(xl, change):
        plt.text(x + 0.4, y + 0.05, '%.3f' % y, ha='center', va='bottom', fontsize=6)
    for x, y in zip(xl, amount):
        plt.text(x + 0.8 + 0.4, y + 0.05, '%.3f' % y, ha='center', va='bottom', fontsize=6)
    plt.show()
bar.png

月涨幅折线图:

def plot_change_amount_line():
    lable_x = ['招商银行', '建设银行', '工商银行', '农业银行', '兴业银行', '中国银行']
    info = {}
    excel_name = {"招商银行": "600036.csv", "建设银行": "601939.csv",
                  "工商银行": "601398.csv", "农业银行": "601288.csv", "兴业银行": "601166.csv",
                  "中国银行": "601988.csv"}
    for name, file in excel_name.items():
        result = get_file_by_type(file, 'M')
        info[name] = result['p_change'].values
    df_lineplot = pd.DataFrame(info)
    ax_line = df_lineplot.plot(kind='line')
    # plt.figure()
    plt.title("几大银行月涨幅数据分析虚线图")
    # line1, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('600036.csv', 'M')['p_change'], 3)),
    #                   color='red', linewidth=1.0, linestyle='--')
    # line2, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('601939.csv', 'M')['p_change'], 3)),
    #                   color='blue',  linewidth=1.0, linestyle='--')
    # line3, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('601398.csv', 'M')['p_change'], 3)),
    #                   color='yellow', linewidth=1.0, linestyle='--')
    # line4, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('601288.csv', 'M')['p_change'], 3)),
    #                   color='green', linewidth=1.0, linestyle='--')
    # line5, = plt.plot(np.linspace(1, 8, 7), list(round(get_file_by_type('601166.csv', 'M')['p_change'],3)),
    #                   color='pink', linewidth=1.0, linestyle='--')
    # line6, = plt.plot(np.linspace(1, 8, 7),  list(round(get_file_by_type('601988.csv', 'M')['p_change'],3)),
    #                   color='black', linewidth=1.0, linestyle='--')
    # plt.legend(handles=[line1, line2, line3, line4, line5, line6], labels=lable_x, loc='upper right')  # 添加图例,标明信息
    plt.ylim((-15, 20))
    plt.xlim((1, 6))
    plt.xlabel('月份')
    plt.ylabel('月涨幅%')
    plt.show()
line.png

涨幅密度图:

# 涨幅密度图
def plot_month_change_density(type):
    info= {}
    excel_name = {"招商银行": "600036.csv", "建设银行": "601939.csv",
                  "工商银行": "601398.csv", "农业银行": "601288.csv", "兴业银行": "601166.csv",
                  "中国银行": "601988.csv"}
    for name, file in excel_name.items():
        result = get_file_by_type(file, type)
        info[name] = result['p_change'].values
    df_desity = pd.DataFrame(info)
    ax = df_desity.plot(kind='kde')
    plt.xlabel("波动百分比")
    if type == 'M':
        plt.title('By month')
    elif type == 'W':
        plt.title('By week')
    plt.show()

涨幅箱体图:

# 月涨幅箱体图
def plot_month_change_boxplot():
    info = {}
    excel_name = {"招商银行": "600036.csv", "建设银行": "601939.csv",
                  "工商银行": "601398.csv", "农业银行": "601288.csv", "兴业银行": "601166.csv",
                  "中国银行": "601988.csv"}
    for name, file in excel_name.items():
        result = get_file_by_type(file, 'M')
        info[name] = result['p_change'].values
    df_boxplot = pd.DataFrame(info)
    ax = df_boxplot.boxplot()
    plt.title("几大银行月涨幅箱体图")
    plt.ylabel('波动百分比')
    plt.show()

程序入口全部代码:

if __name__ == '__main__':
    plot_change_amount_bar()
    plot_change_amount_line()
    plot_month_change_density('M')
    plot_month_change_density('W')
    plot_month_change_boxplot()

kde1.png
kde2.png
box.png

你可能感兴趣的:(Pandas plot 爬取的数据)