#resample
import pandas as pd
import numpy as np
date_rng = pd.date_range('20170101', periods=100, freq='D')
ser_obj = pd.Series(range(len(date_rng)), index=date_rng)
print(ser_obj.head(10))
answer:
2017-01-01 0
2017-01-02 1
2017-01-03 2
2017-01-04 3
2017-01-05 4
2017-01-06 5
2017-01-07 6
2017-01-08 7
2017-01-09 8
2017-01-10 9
Freq: D, dtype: int64
# 统计每个月的数据总和
resample_month_sum = ser_obj.resample('M').sum()
# 统计每个月的数据平均
resample_month_mean = ser_obj.resample('M').mean()
print('按月求和:', resample_month_sum)
print('按月求均值:', resample_month_mean)
answer:
按月求和: 2017-01-31 465
2017-02-28 1246
2017-03-31 2294
2017-04-30 945
Freq: M, dtype: int64
按月求均值: 2017-01-31 15.0
2017-02-28 44.5
2017-03-31 74.0
2017-04-30 94.5
Freq: M, dtype: float64
# 将数据聚合到5天的频率
five_day_sum_sample = ser_obj.resample('5D').sum()
five_day_mean_sample = ser_obj.resample('5D').mean()
five_day_ohlc_sample = ser_obj.resample('5D').ohlc()
print('降采样,sum..')
print(five_day_sum_sample.head())
降采样,sum..
2017-01-01 10
2017-01-06 35
2017-01-11 60
2017-01-16 85
2017-01-21 110
dtype: int64
print('降采样,ohlc')
print(five_day_ohlc_sample.head())
降采样,ohlc
open high low close
2017-01-01 0 4 0 4
2017-01-06 5 9 5 9
2017-01-11 10 14 10 14
2017-01-16 15 19 15 19
2017-01-21 20 24 20 24
# 使用groupby降采样
print(ser_obj.groupby(lambda x: x.month).sum())
answer
1 465
2 1246
3 2294
4 945
dtype: int32
print(ser_obj.groupby(lambda x: x.weekday).sum())
answer
0 750
1 665
2 679
3 693
4 707
5 721
6 735
dtype: int32
#升采样
df = pd.DataFrame(np.random.randn(5, 3),
index=pd.date_range('20170101', periods=5, freq='W-MON'),
columns=['S1', 'S2', 'S3'])
print(df)
answer
S1 S2 S3
2017-01-02 0.087264 -0.047404 -0.754223
2017-01-09 1.148830 2.439266 -0.889873
2017-01-16 0.331767 0.918984 1.164783
2017-01-23 -0.582157 0.923737 1.938061
2017-01-30 -0.637087 0.143846 -1.500307
# 直接重采样会产生空值
print(df.resample('D').asfreq().head(10))
answer
S1 S2 S3
2017-01-02 0.003409 -0.939362 2.036451
2017-01-03 NaN NaN NaN
2017-01-04 NaN NaN NaN
2017-01-05 NaN NaN NaN
2017-01-06 NaN NaN NaN
2017-01-07 NaN NaN NaN
2017-01-08 NaN NaN NaN
2017-01-09 0.291274 -0.655332 -1.034041
2017-01-10 NaN NaN NaN
2017-01-11 NaN NaN NaN
#ffill
print(df.resample('D').ffill(2).head())
answer
S1 S2 S3
2017-01-02 0.003409 -0.939362 2.036451
2017-01-03 0.003409 -0.939362 2.036451
2017-01-04 0.003409 -0.939362 2.036451
2017-01-05 NaN NaN NaN
2017-01-06 NaN NaN NaN
2017-01-07 NaN NaN NaN
2017-01-08 NaN NaN NaN
2017-01-09 0.291274 -0.655332 -1.034041
2017-01-10 0.291274 -0.655332 -1.034041
2017-01-11 0.291274 -0.655332 -1.034041
print(df.resample('D').bfill())
print(df.resample('D').fillna('ffill'))
print(df.resample('D').interpolate('linear'))