matplotlib日期坐标轴及dataframe的merge操作Demo

1.主要问题

主要记录dataframe构造,非连续时间做X轴的处理以及pandas中dataframe的连接操作。

2.Demo代码

# coding:utf8
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates as mdates


# 方法1: 默认index是X轴 达到预期
def p1():
    ax = fig.add_subplot(311)
    ax.plot(df_days['data'], label='data')  # 默认以index作为x轴

    for a, b in zip(df_days.index.to_list(), df_days['data']):
        ax.text(a, b, '{:,.2f}'.format(b), ha='center', fontsize=14, va='bottom', color='blue')

    # 画X轴刻度及label
    # 方法1:用plt.xticks
    # xticks两个参数只在plt有,同时设置刻度跟标签
    plt.xticks(df_days.index.to_list(), xlabels, rotation=90)
    # ax.xticks(df_days.index.to_list(), xlabels, rotation=90)
    # #AttributeError: 'AxesSubplot' object has no attribute 'xticks'

    # 画X轴刻度及label
    # 方法2:用ax.set_xticks及ax.set_xticklabels
    # ax的ticks跟tickslabel要分开设置!!!
    # ax.set_xticks(df_days.index.to_list())  # Index作刻度
    # ax.set_xticklabels(xlabels, rotation=30)

    ax.grid()
    ax.legend()


# 方法3:用非index做X轴,若是日期型,或者mdate.date2num转数字为x轴tick,去掉的周末会留空,非预期
def p2():
    ax = fig.add_subplot(312)
    # plt.plot(df_days["trd_date"], df_days['data'], label='data')
    # 等价于,日期即转不转数字,内部都是以数字处理,即当前日期与"0000-01-01 00:00:00"相减的天数
    plt.plot(df_days["trd_date"].map(lambda x: mdates.date2num(x)), df_days['data'], label='data')

    for a, b in zip(df_days["trd_date"], df_days['data']):
        ax.text(mdates.date2num(a), b, '{:,.2f}'.format(b), ha='center', fontsize=14, va='bottom')

    plt.xticks(df_days["trd_date"], xlabels, rotation=45)
    ax.grid()
    ax.legend()


# 方法3:用非Index做X轴,且非日期型 达到预期
def p3():
    ax = fig.add_subplot(313)
    ax.plot(xlabels, df_days['data'], label='data')

    for a, b in zip(df_days.index.to_list(), df_days['data']):
        ax.text(a, b, '{:,.2f}'.format(b), ha='center', fontsize=14, va='bottom')

    ax.set_xticklabels(xlabels, rotation=35)
    ax.grid()
    ax.legend()


def get_df_without_56(days=29):
    # 起始日期
    start_date = datetime.date.today() - datetime.timedelta(days=days)
    # 日期序列
    day_list = pd.date_range(start_date, periods=days, freq='D')
    # 序列里直接过滤掉周末
    day_list = day_list[day_list.weekday < 5]
    # 生产随机Demo数据
    data_list = np.random.rand(days - int(days / 7) * 2) * 100

    # 生成dataframe方法1:两步走
    df_days = pd.DataFrame(data=day_list, columns=['trd_date'])
    df_days['data'] = data_list  # 增加数据列
    return df_days


def get_df_without_01(days=29):
    # 起始日期
    start_date = datetime.date.today() - datetime.timedelta(days=days)
    # 日期序列
    day_list = pd.date_range(start_date, periods=days, freq='D')
    # 序列里直接过滤周一周二
    day_list = day_list[day_list.weekday > 1]
    # 生产随机Demo数据
    data_list = np.random.rand(days - int(days / 7) * 2) * 100

    # 生成dataframe方法2:zip后一步到位
    df_days = pd.DataFrame(list(zip(day_list, data_list)), columns=['trd_date', 'data'])
    return df_days


def df_merge_demo(left, right):
    # 默认以重叠的列名当做连接键,本例中等同于pd.merge(left,right,on=['trd_date','data'],how='inner')
    defaultmerge = pd.merge(left, right)
    print("\n测试:defaultmerge=pd.merge(left, right)")
    print(defaultmerge.head(100))

    # 等同于pd.concat([left,right]).reset_index(drop=True)
    defaultouter = pd.merge(left, right, how='outer')
    print("\n测试:defaultouter = pd.merge(left, right, how='outer')")
    print(defaultouter.head(100))
    pdconcat = pd.concat([left, right]).reset_index(drop=True)
    print("\n测试:pdconcat= pd.concat([left, right]).reset_index(drop=True)")
    print(pdconcat)

    # 如果两个对象的key列名不同,可以分别指定,例:pd.merge(left,right,left_on='lkey',right_on='rkey')
    leftjoin = pd.merge(left, right, on='trd_date', how='left')
    print("\n测试:leftjoin = pd.merge(left, right, on='trd_date', how='left')")
    print(leftjoin.head(100))
    # 多键连接时将连接键组成列表传入,例:pd.merge(left,right,on=['key1','key2']
    rightjoin = pd.merge(left, right, on='trd_date', how='right')
    print("\n测试:rightjoin = pd.merge(left, right, on='trd_date', how='right')")
    print(rightjoin.head(100))

    innerjoin = pd.merge(left, right, on='trd_date', how='inner')  # 默认为inner
    print("\n测试:innerjoin = pd.merge(left, right, on='trd_date', how='inner')")
    print(innerjoin.head(100))

    outerjoin = pd.merge(left, right, on='trd_date', how='outer')
    print("\n测试:innerjoin = pd.merge(left, right, on='trd_date', how='outer')")
    print(outerjoin.head(100))

    left = left.set_index('trd_date')  # 设置索引
    right = right.set_index('trd_date') # 设置索引
    # join连接后重建索引
    dfjoin = left.join(right, lsuffix='_l', rsuffix='_r',sort=False).reset_index()
    print("\n测试:dfjoin = left.join(right).reset_index()")
    print(dfjoin.head(100))
    dfjoin.rename(columns={'trd_date': '交易日期'}, inplace=True)  # 改列名
    print("\n测试:dfjoin.rename(columns={'trd_date': '交易日期'}, inplace=True)  # 改列名")
    print(dfjoin.head(100))


if __name__ == '__main__':
    df_days = get_df_without_56(15)
    df_days_01 = get_df_without_01(15)

    # dataframe merge 测试
    df_merge_demo(df_days, df_days_01)

    fig = plt.figure(figsize=(20, 10), dpi=80)
    # 生成x轴的label,方法1: map然后lambda表达式
    xlabels = df_days["trd_date"].map(lambda x: x.strftime('%m-%d'))
    # 或者 方法2: for in 循环生成数组
    # xlabels = [date.strftime('%m-%d') for (date) in df_days["trd_date"]]

    p1()
    p2()
    p3()

    plt.show()
    plt.close()

3.执行结果

测试:defaultmerge=pd.merge(left, right)
Empty DataFrame
Columns: [trd_date, data]
Index: []
测试:defaultouter = pd.merge(left, right, how='outer')
     trd_date       data
0  2020-02-20  28.519733
1  2020-02-21  21.957296
2  2020-02-24  81.709042
3  2020-02-25  49.141968
4  2020-02-26  75.962113
5  2020-02-27  34.297047
6  2020-02-28   8.599335
7  2020-03-02  59.895028
8  2020-03-03  36.150423
9  2020-03-04  41.961246
10 2020-03-05  78.180868
11 2020-02-20  81.522147
12 2020-02-21  66.379312
13 2020-02-22  68.038422
14 2020-02-23  82.831858
15 2020-02-26  44.112500
16 2020-02-27   9.492410
17 2020-02-28  53.929783
18 2020-02-29  19.431358
19 2020-03-01  59.103261
20 2020-03-04  66.754922
21 2020-03-05  66.497167
测试:pdconcat= pd.concat([left, right]).reset_index(drop=True)
     trd_date       data
0  2020-02-20  28.519733
1  2020-02-21  21.957296
2  2020-02-24  81.709042
3  2020-02-25  49.141968
4  2020-02-26  75.962113
5  2020-02-27  34.297047
6  2020-02-28   8.599335
7  2020-03-02  59.895028
8  2020-03-03  36.150423
9  2020-03-04  41.961246
10 2020-03-05  78.180868
11 2020-02-20  81.522147
12 2020-02-21  66.379312
13 2020-02-22  68.038422
14 2020-02-23  82.831858
15 2020-02-26  44.112500
16 2020-02-27   9.492410
17 2020-02-28  53.929783
18 2020-02-29  19.431358
19 2020-03-01  59.103261
20 2020-03-04  66.754922
21 2020-03-05  66.497167
测试:leftjoin = pd.merge(left, right, on='trd_date', how='left')
     trd_date     data_x     data_y
0  2020-02-20  28.519733  81.522147
1  2020-02-21  21.957296  66.379312
2  2020-02-24  81.709042        NaN
3  2020-02-25  49.141968        NaN
4  2020-02-26  75.962113  44.112500
5  2020-02-27  34.297047   9.492410
6  2020-02-28   8.599335  53.929783
7  2020-03-02  59.895028        NaN
8  2020-03-03  36.150423        NaN
9  2020-03-04  41.961246  66.754922
10 2020-03-05  78.180868  66.497167
测试:rightjoin = pd.merge(left, right, on='trd_date', how='right')
     trd_date     data_x     data_y
0  2020-02-20  28.519733  81.522147
1  2020-02-21  21.957296  66.379312
2  2020-02-26  75.962113  44.112500
3  2020-02-27  34.297047   9.492410
4  2020-02-28   8.599335  53.929783
5  2020-03-04  41.961246  66.754922
6  2020-03-05  78.180868  66.497167
7  2020-02-22        NaN  68.038422
8  2020-02-23        NaN  82.831858
9  2020-02-29        NaN  19.431358
10 2020-03-01        NaN  59.103261
测试:innerjoin = pd.merge(left, right, on='trd_date', how='inner')
    trd_date     data_x     data_y
0 2020-02-20  28.519733  81.522147
1 2020-02-21  21.957296  66.379312
2 2020-02-26  75.962113  44.112500
3 2020-02-27  34.297047   9.492410
4 2020-02-28   8.599335  53.929783
5 2020-03-04  41.961246  66.754922
6 2020-03-05  78.180868  66.497167
测试:innerjoin = pd.merge(left, right, on='trd_date', how='outer')
     trd_date     data_x     data_y
0  2020-02-20  28.519733  81.522147
1  2020-02-21  21.957296  66.379312
2  2020-02-24  81.709042        NaN
3  2020-02-25  49.141968        NaN
4  2020-02-26  75.962113  44.112500
5  2020-02-27  34.297047   9.492410
6  2020-02-28   8.599335  53.929783
7  2020-03-02  59.895028        NaN
8  2020-03-03  36.150423        NaN
9  2020-03-04  41.961246  66.754922
10 2020-03-05  78.180868  66.497167
11 2020-02-22        NaN  68.038422
12 2020-02-23        NaN  82.831858
13 2020-02-29        NaN  19.431358
14 2020-03-01        NaN  59.103261
测试:dfjoin = left.join(right).reset_index()
     trd_date     data_l     data_r
0  2020-02-20  28.519733  81.522147
1  2020-02-21  21.957296  66.379312
2  2020-02-24  81.709042        NaN
3  2020-02-25  49.141968        NaN
4  2020-02-26  75.962113  44.112500
5  2020-02-27  34.297047   9.492410
6  2020-02-28   8.599335  53.929783
7  2020-03-02  59.895028        NaN
8  2020-03-03  36.150423        NaN
9  2020-03-04  41.961246  66.754922
10 2020-03-05  78.180868  66.497167
测试:dfjoin.rename(columns={'trd_date': '交易日期'}, inplace=True)  # 改列名
         交易日期     data_l     data_r
0  2020-02-20  28.519733  81.522147
1  2020-02-21  21.957296  66.379312
2  2020-02-24  81.709042        NaN
3  2020-02-25  49.141968        NaN
4  2020-02-26  75.962113  44.112500
5  2020-02-27  34.297047   9.492410
6  2020-02-28   8.599335  53.929783
7  2020-03-02  59.895028        NaN
8  2020-03-03  36.150423        NaN
9  2020-03-04  41.961246  66.754922
10 2020-03-05  78.180868  66.497167

你可能感兴趣的:(matplotlib日期坐标轴及dataframe的merge操作Demo)