pandas 时间序列处理

https://www.cnblogs.com/bradleon/p/6827109.html

时间序列 
- 时间戳(timestamp)
- 固定周期(period)
- 时间间隔(interval)

 

# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""

import pandas as pd
import numpy as np
import matplotlib.pylab as plt

## 1 创建

rng = pd.date_range('2016-07-01', periods = 10, freq = '3D')
# time 20160701;2016 Jul 1;7/1/2016;2016-0701
# periods  10个
# freq Y M D H
'''
DatetimeIndex(['2016-07-01', '2016-07-04', '2016-07-07', '2016-07-10',
               '2016-07-13', '2016-07-16', '2016-07-19', '2016-07-22',
               '2016-07-25', '2016-07-28'],
              dtype='datetime64[ns]', freq='3D')
'''
time = pd.Series(np.random.randn(20),index=rng)
'''
2016-07-01    0.416394
2016-07-04   -0.046777
2016-07-07    0.488991
2016-07-10   -0.297916
2016-07-13    1.257531
2016-07-16   -1.116187
2016-07-19    0.627790
2016-07-22   -0.511905
2016-07-25   -0.108427
2016-07-28    1.232823
2016-07-31    2.434077
2016-08-03   -0.192559
2016-08-06   -1.469236
2016-08-09   -0.061900
2016-08-12   -0.814023
2016-08-15    2.240778
2016-08-18   -0.510638
2016-08-21   -0.421424
2016-08-24    2.080042
2016-08-27   -0.163059
Freq: 3D, dtype: float64
'''

#  (2) 时间过滤truncate(),after before
time.truncate(before='20160803')
'''
2016-08-03   -0.192559
2016-08-06   -1.469236
2016-08-09   -0.061900
2016-08-12   -0.814023
2016-08-15    2.240778
2016-08-18   -0.510638
2016-08-21   -0.421424
2016-08-24    2.080042
2016-08-27   -0.163059
Freq: 3D, dtype: float64
'''
# (3) 时间戳
pd.Timestamp('20190120 10:15')
'''
Timestamp('2019-01-20 10:15:00')
'''
#  (4)时间区间
pd.Period('20160101')
'''
Period('2016-01-01', 'D')
'''
#(5)时间偏移量
pd.Timedelta('1 day')
'''
Timedelta('1 days 00:00:00')
'''
pd.Period('20160101')+pd.Timedelta('1 day')
'''
Period('2016-01-02', 'D')
'''
# 2 重采样
rng = pd.date_range('2016/07/01',periods=20,freq='10D')
ts = pd.Series(np.random.randn(20),index=rng)
'''
2016-07-01    1.732488
2016-07-11    0.387473
2016-07-21    1.660098
2016-07-31   -0.144417
2016-08-10   -0.097475
2016-08-20    0.944346
2016-08-30    1.119187
2016-09-09   -0.063570
2016-09-19    1.715250
2016-09-29   -0.070396
2016-10-09    0.468284
2016-10-19   -0.884766
2016-10-29   -0.127796
2016-11-08   -0.416377
2016-11-18   -1.180885
2016-11-28    1.818407
2016-12-08    0.534103
2016-12-18    0.374830
2016-12-28    1.805555
2017-01-07   -1.542637
Freq: 10D, dtype: float64
'''
# (1)降采样,按月总和
ts1=ts.resample('M').sum()
'''
2016-07-31    3.635642
2016-08-31    1.966059
2016-09-30    1.581283
2016-10-31   -0.544278
2016-11-30    0.221146
2016-12-31    2.714489
2017-01-31   -1.542637
Freq: M, dtype: float64
'''
# (2)降采样,按月均值
ts2=ts.resample('M').mean()
'''
2016-07-31    0.908911
2016-08-31    0.655353
2016-09-30    0.527094
2016-10-31   -0.181426
2016-11-30    0.073715
2016-12-31    0.904830
2017-01-31   -1.542637
Freq: M, dtype: float64
'''
rg = pd.date_range('2016/07/01',periods=10,freq='3D')
ta = pd.Series(np.random.randn(10),index=rg)
'''
2016-07-01    0.058873
2016-07-04    1.211198
2016-07-07   -0.411161
2016-07-10    0.154518
2016-07-13    1.019206
2016-07-16    0.048274
2016-07-19    0.658861
2016-07-22   -0.053293
2016-07-25   -1.105786
2016-07-28   -1.044237
Freq: 3D, dtype: float64
'''
# (3)升采样,三天变一天
ta.resample('D').asfreq()
'''
2016-07-01    0.058873
2016-07-02         NaN
2016-07-03         NaN
2016-07-04    1.211198
2016-07-05         NaN
2016-07-06         NaN
2016-07-07   -0.411161
2016-07-08         NaN
2016-07-09         NaN
2016-07-10    0.154518
2016-07-11         NaN
2016-07-12         NaN
2016-07-13    1.019206
2016-07-14         NaN
2016-07-15         NaN
2016-07-16    0.048274
2016-07-17         NaN
2016-07-18         NaN
2016-07-19    0.658861
2016-07-20         NaN
2016-07-21         NaN
2016-07-22   -0.053293
2016-07-23         NaN
2016-07-24         NaN
2016-07-25   -1.105786
2016-07-26         NaN
2016-07-27         NaN
2016-07-28   -1.044237
Freq: D, dtype: float64
'''
#  (4)升采样,三天变一天,差值
#插值方法:1、取前值ffill  2、取后值bfill  3、线性取值interpolate
ta.resample('D').ffill(1)#1填充第一个nan
'''
2016-07-01    0.058873
2016-07-02    0.058873
2016-07-03         NaN
2016-07-04    1.211198
2016-07-05    1.211198
2016-07-06         NaN
2016-07-07   -0.411161
2016-07-08   -0.411161
2016-07-09         NaN
2016-07-10    0.154518
2016-07-11    0.154518
2016-07-12         NaN
2016-07-13    1.019206
2016-07-14    1.019206
2016-07-15         NaN
2016-07-16    0.048274
2016-07-17    0.048274
2016-07-18         NaN
2016-07-19    0.658861
2016-07-20    0.658861
2016-07-21         NaN
2016-07-22   -0.053293
2016-07-23   -0.053293
2016-07-24         NaN
2016-07-25   -1.105786
2016-07-26   -1.105786
2016-07-27         NaN
2016-07-28   -1.044237
Freq: D, dtype: float64
'''
ta.resample('D').interpolate()
'''
2016-07-01    0.058873
2016-07-02    0.442981
2016-07-03    0.827089
2016-07-04    1.211198
2016-07-05    0.670411
2016-07-06    0.129625
2016-07-07   -0.411161
2016-07-08   -0.222601
2016-07-09   -0.034042
2016-07-10    0.154518
2016-07-11    0.442747
2016-07-12    0.730977
2016-07-13    1.019206
2016-07-14    0.695562
2016-07-15    0.371918
2016-07-16    0.048274
2016-07-17    0.251803
2016-07-18    0.455332
2016-07-19    0.658861
2016-07-20    0.421476
2016-07-21    0.184092
2016-07-22   -0.053293
2016-07-23   -0.404124
2016-07-24   -0.754955
2016-07-25   -1.105786
2016-07-26   -1.085270
2016-07-27   -1.064754
2016-07-28   -1.044237
Freq: D, dtype: float64
'''

## 5 滑动窗口
df = pd.Series(np.random.randn(600),index=pd.date_range('7/1/2016',freq='D',periods=600))
print(df.head())
'''
2016-07-01   -0.953268
2016-07-02    2.739375
2016-07-03    0.286427
2016-07-04    1.333070
2016-07-05    1.424279
Freq: D, dtype: float64
'''
r = df.rolling(window=10)
print(r.mean())
#r.max, r.median, r.std, r.skew, r.sum, r.var
'''
2016-07-01         NaN
2016-07-02         NaN
2016-07-03         NaN
2016-07-04         NaN
2016-07-05         NaN
2016-07-06         NaN
2016-07-07         NaN
2016-07-08         NaN
2016-07-09         NaN
2016-07-10    0.291856
2016-07-11    0.375966
2016-07-12    0.108790
2016-07-13    0.090306
2016-07-14    0.072220
2016-07-15   -0.156018
2016-07-16   -0.339148
2016-07-17   -0.260727
2016-07-18   -0.285549
2016-07-19   -0.222160
2016-07-20    0.063413
2016-07-21   -0.177591
2016-07-22   -0.166860
2016-07-23   -0.095712
2016-07-24    0.043481
2016-07-25    0.236758
2016-07-26    0.324270
2016-07-27    0.227588
2016-07-28    0.304813
2016-07-29    0.455341
2016-07-30    0.246289
                ...   
2018-01-22    0.452047
2018-01-23    0.362680
2018-01-24    0.370529
2018-01-25    0.268178
2018-01-26    0.299888
2018-01-27    0.375809
2018-01-28    0.237203
2018-01-29    0.136555
2018-01-30   -0.016395
2018-01-31    0.107540
2018-02-01   -0.166109
2018-02-02   -0.132009
2018-02-03   -0.148901
2018-02-04   -0.076616
2018-02-05   -0.104145
2018-02-06   -0.359409
2018-02-07   -0.590481
2018-02-08   -0.391723
2018-02-09   -0.330878
2018-02-10   -0.394146
2018-02-11   -0.265302
2018-02-12   -0.165355
2018-02-13   -0.247112
2018-02-14   -0.242417
2018-02-15   -0.131072
2018-02-16   -0.163110
2018-02-17    0.012979
2018-02-18   -0.010954
2018-02-19    0.065030
2018-02-20    0.041370
Freq: D, Length: 600, dtype: float64
'''
plt.figure(figsize=(15,5))
df.plot(style='r--')
df.rolling(window=10).mean().plot(style='b')

 

 

pandas 时间序列处理_第1张图片

你可能感兴趣的:(Python基础)