基于pandas的时间序列处理方法


import pandas as pd
import numpy as np
#生成时间序列
rng = pd.date_range('2016/1/1', periods=20, freq='D')
time = pd.Series(np.random.rand(20), index=rng)
#print(time)
#过滤数据
time.truncate(before='2016-1-10')
#print(time)

#数据重采样
'''
1.时间数据由一个频率转换至另一个频率
2.降采样
3.升采样
'''
rng = pd.date_range('1/1/2011', periods=90, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
#print(ts.head())

#转成成以月为单位
#ts.resample('M').sum()
day3Ts = ts.resample('3D').sum()
#print(ts.resample('3D').mean())


#插值填充
#print(day3Ts.resample('D').asfreq())

#三种插值方法
'''
1.ffill 空值取前面的值
2.bfill 控制取后面的值
3.interporate 线性取值
'''
#print(day3Ts.resample('D').ffill(1))#对多少个NaN填充
'''
2011-01-01   -1.376295
2011-01-02   -1.376295
2011-01-03         NaN
'''

#print(day3Ts.resample('D').bfill(1))
'''
2011-01-01    0.948138
2011-01-02         NaN
2011-01-03    0.560229
2011-01-04    0.560229
'''
#print(day3Ts.resample('D').interpolate('linear'))
'''
2011-01-01    0.453659
2011-01-02    0.993409
2011-01-03    1.533158
2011-01-04    2.072907
'''
# 滑动窗口
'''
预测时求一段时间的平均值 
'''

df = pd.Series(np.random.randn(600), index=pd.date_range('7/1/2016',freq='D', periods=600))
#print(df.head())

#r = df.rolling(window=10)#窗口大小为10

#r.max,r.median,r.std,r.skew,r.sum,r.var'
#print(r.mean())
'''
2016-07-01         NaN
2016-07-02         NaN
2016-07-03         NaN
2016-07-04         NaN
2016-07-05         NaN
2016-07-06         NaN
2016-07-07         NaN
2016-07-08         NaN
2016-07-09         NaN
2016-07-10    0.437476
2016-07-11    0.042982
'''

import matplotlib.pyplot as plt
df = pd.Series(np.random.randn(600), index=pd.date_range('7/1/2016',freq='D', periods=600))
plt.figure(figsize=(15, 5))
df.plot(style='r--')
df.rolling(window=10).mean().plot(style='b')

基于pandas的时间序列处理方法_第1张图片

你可能感兴趣的:(机器学)