https://www.cnblogs.com/bradleon/p/6827109.html
时间序列
- 时间戳(timestamp)
- 固定周期(period)
- 时间间隔(interval)
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
## 1 创建
rng = pd.date_range('2016-07-01', periods = 10, freq = '3D')
# time 20160701;2016 Jul 1;7/1/2016;2016-0701
# periods 10个
# freq Y M D H
'''
DatetimeIndex(['2016-07-01', '2016-07-04', '2016-07-07', '2016-07-10',
'2016-07-13', '2016-07-16', '2016-07-19', '2016-07-22',
'2016-07-25', '2016-07-28'],
dtype='datetime64[ns]', freq='3D')
'''
time = pd.Series(np.random.randn(20),index=rng)
'''
2016-07-01 0.416394
2016-07-04 -0.046777
2016-07-07 0.488991
2016-07-10 -0.297916
2016-07-13 1.257531
2016-07-16 -1.116187
2016-07-19 0.627790
2016-07-22 -0.511905
2016-07-25 -0.108427
2016-07-28 1.232823
2016-07-31 2.434077
2016-08-03 -0.192559
2016-08-06 -1.469236
2016-08-09 -0.061900
2016-08-12 -0.814023
2016-08-15 2.240778
2016-08-18 -0.510638
2016-08-21 -0.421424
2016-08-24 2.080042
2016-08-27 -0.163059
Freq: 3D, dtype: float64
'''
# (2) 时间过滤truncate(),after before
time.truncate(before='20160803')
'''
2016-08-03 -0.192559
2016-08-06 -1.469236
2016-08-09 -0.061900
2016-08-12 -0.814023
2016-08-15 2.240778
2016-08-18 -0.510638
2016-08-21 -0.421424
2016-08-24 2.080042
2016-08-27 -0.163059
Freq: 3D, dtype: float64
'''
# (3) 时间戳
pd.Timestamp('20190120 10:15')
'''
Timestamp('2019-01-20 10:15:00')
'''
# (4)时间区间
pd.Period('20160101')
'''
Period('2016-01-01', 'D')
'''
#(5)时间偏移量
pd.Timedelta('1 day')
'''
Timedelta('1 days 00:00:00')
'''
pd.Period('20160101')+pd.Timedelta('1 day')
'''
Period('2016-01-02', 'D')
'''
# 2 重采样
rng = pd.date_range('2016/07/01',periods=20,freq='10D')
ts = pd.Series(np.random.randn(20),index=rng)
'''
2016-07-01 1.732488
2016-07-11 0.387473
2016-07-21 1.660098
2016-07-31 -0.144417
2016-08-10 -0.097475
2016-08-20 0.944346
2016-08-30 1.119187
2016-09-09 -0.063570
2016-09-19 1.715250
2016-09-29 -0.070396
2016-10-09 0.468284
2016-10-19 -0.884766
2016-10-29 -0.127796
2016-11-08 -0.416377
2016-11-18 -1.180885
2016-11-28 1.818407
2016-12-08 0.534103
2016-12-18 0.374830
2016-12-28 1.805555
2017-01-07 -1.542637
Freq: 10D, dtype: float64
'''
# (1)降采样,按月总和
ts1=ts.resample('M').sum()
'''
2016-07-31 3.635642
2016-08-31 1.966059
2016-09-30 1.581283
2016-10-31 -0.544278
2016-11-30 0.221146
2016-12-31 2.714489
2017-01-31 -1.542637
Freq: M, dtype: float64
'''
# (2)降采样,按月均值
ts2=ts.resample('M').mean()
'''
2016-07-31 0.908911
2016-08-31 0.655353
2016-09-30 0.527094
2016-10-31 -0.181426
2016-11-30 0.073715
2016-12-31 0.904830
2017-01-31 -1.542637
Freq: M, dtype: float64
'''
rg = pd.date_range('2016/07/01',periods=10,freq='3D')
ta = pd.Series(np.random.randn(10),index=rg)
'''
2016-07-01 0.058873
2016-07-04 1.211198
2016-07-07 -0.411161
2016-07-10 0.154518
2016-07-13 1.019206
2016-07-16 0.048274
2016-07-19 0.658861
2016-07-22 -0.053293
2016-07-25 -1.105786
2016-07-28 -1.044237
Freq: 3D, dtype: float64
'''
# (3)升采样,三天变一天
ta.resample('D').asfreq()
'''
2016-07-01 0.058873
2016-07-02 NaN
2016-07-03 NaN
2016-07-04 1.211198
2016-07-05 NaN
2016-07-06 NaN
2016-07-07 -0.411161
2016-07-08 NaN
2016-07-09 NaN
2016-07-10 0.154518
2016-07-11 NaN
2016-07-12 NaN
2016-07-13 1.019206
2016-07-14 NaN
2016-07-15 NaN
2016-07-16 0.048274
2016-07-17 NaN
2016-07-18 NaN
2016-07-19 0.658861
2016-07-20 NaN
2016-07-21 NaN
2016-07-22 -0.053293
2016-07-23 NaN
2016-07-24 NaN
2016-07-25 -1.105786
2016-07-26 NaN
2016-07-27 NaN
2016-07-28 -1.044237
Freq: D, dtype: float64
'''
# (4)升采样,三天变一天,差值
#插值方法:1、取前值ffill 2、取后值bfill 3、线性取值interpolate
ta.resample('D').ffill(1)#1填充第一个nan
'''
2016-07-01 0.058873
2016-07-02 0.058873
2016-07-03 NaN
2016-07-04 1.211198
2016-07-05 1.211198
2016-07-06 NaN
2016-07-07 -0.411161
2016-07-08 -0.411161
2016-07-09 NaN
2016-07-10 0.154518
2016-07-11 0.154518
2016-07-12 NaN
2016-07-13 1.019206
2016-07-14 1.019206
2016-07-15 NaN
2016-07-16 0.048274
2016-07-17 0.048274
2016-07-18 NaN
2016-07-19 0.658861
2016-07-20 0.658861
2016-07-21 NaN
2016-07-22 -0.053293
2016-07-23 -0.053293
2016-07-24 NaN
2016-07-25 -1.105786
2016-07-26 -1.105786
2016-07-27 NaN
2016-07-28 -1.044237
Freq: D, dtype: float64
'''
ta.resample('D').interpolate()
'''
2016-07-01 0.058873
2016-07-02 0.442981
2016-07-03 0.827089
2016-07-04 1.211198
2016-07-05 0.670411
2016-07-06 0.129625
2016-07-07 -0.411161
2016-07-08 -0.222601
2016-07-09 -0.034042
2016-07-10 0.154518
2016-07-11 0.442747
2016-07-12 0.730977
2016-07-13 1.019206
2016-07-14 0.695562
2016-07-15 0.371918
2016-07-16 0.048274
2016-07-17 0.251803
2016-07-18 0.455332
2016-07-19 0.658861
2016-07-20 0.421476
2016-07-21 0.184092
2016-07-22 -0.053293
2016-07-23 -0.404124
2016-07-24 -0.754955
2016-07-25 -1.105786
2016-07-26 -1.085270
2016-07-27 -1.064754
2016-07-28 -1.044237
Freq: D, dtype: float64
'''
## 5 滑动窗口
df = pd.Series(np.random.randn(600),index=pd.date_range('7/1/2016',freq='D',periods=600))
print(df.head())
'''
2016-07-01 -0.953268
2016-07-02 2.739375
2016-07-03 0.286427
2016-07-04 1.333070
2016-07-05 1.424279
Freq: D, dtype: float64
'''
r = df.rolling(window=10)
print(r.mean())
#r.max, r.median, r.std, r.skew, r.sum, r.var
'''
2016-07-01 NaN
2016-07-02 NaN
2016-07-03 NaN
2016-07-04 NaN
2016-07-05 NaN
2016-07-06 NaN
2016-07-07 NaN
2016-07-08 NaN
2016-07-09 NaN
2016-07-10 0.291856
2016-07-11 0.375966
2016-07-12 0.108790
2016-07-13 0.090306
2016-07-14 0.072220
2016-07-15 -0.156018
2016-07-16 -0.339148
2016-07-17 -0.260727
2016-07-18 -0.285549
2016-07-19 -0.222160
2016-07-20 0.063413
2016-07-21 -0.177591
2016-07-22 -0.166860
2016-07-23 -0.095712
2016-07-24 0.043481
2016-07-25 0.236758
2016-07-26 0.324270
2016-07-27 0.227588
2016-07-28 0.304813
2016-07-29 0.455341
2016-07-30 0.246289
...
2018-01-22 0.452047
2018-01-23 0.362680
2018-01-24 0.370529
2018-01-25 0.268178
2018-01-26 0.299888
2018-01-27 0.375809
2018-01-28 0.237203
2018-01-29 0.136555
2018-01-30 -0.016395
2018-01-31 0.107540
2018-02-01 -0.166109
2018-02-02 -0.132009
2018-02-03 -0.148901
2018-02-04 -0.076616
2018-02-05 -0.104145
2018-02-06 -0.359409
2018-02-07 -0.590481
2018-02-08 -0.391723
2018-02-09 -0.330878
2018-02-10 -0.394146
2018-02-11 -0.265302
2018-02-12 -0.165355
2018-02-13 -0.247112
2018-02-14 -0.242417
2018-02-15 -0.131072
2018-02-16 -0.163110
2018-02-17 0.012979
2018-02-18 -0.010954
2018-02-19 0.065030
2018-02-20 0.041370
Freq: D, Length: 600, dtype: float64
'''
plt.figure(figsize=(15,5))
df.plot(style='r--')
df.rolling(window=10).mean().plot(style='b')