参考书目:《跟老齐学Python-数据分析》
import datetime
now = datetime.datetime.now() # curren time
now
Out[4]: datetime.datetime(2021, 4, 28, 22, 9, 3, 502591)
now.day,now.month,now.year
Out[5]: (28, 4, 2021)
now.time()
Out[6]: datetime.time(22, 9, 3, 502591)
datetime.datetime.today()
Out[8]: datetime.datetime(2021, 4, 28, 22, 11, 37, 440566)
# -----------------------
import numpy as np
import pandas as pd
now = pd.Timestamp.now()
now
Out[13]: Timestamp('2021-04-28 22:12:46.451419')
now.year,now.month,now.day
Out[14]: (2021, 4, 28)
# 实例化Timestamp类得到指定时刻的Timestamp对象
pd.Timestamp(datetime.datetime(1997,7,1))
Out[16]: Timestamp('1997-07-01 00:00:00')
datetime.datetime(1997,7,1)
Out[17]: datetime.datetime(1997, 7, 1, 0, 0)
pd.Timestamp("1997-7-1")
Out[18]: Timestamp('1997-07-01 00:00:00')
pd.Timestamp(1997,7,1)
Out[19]: Timestamp('1997-07-01 00:00:00')
# pandas中关于时间的另外两个类Timedelta,Period
# Timedelta类实例化对象
pd.Timedelta(days=3,hours=4,minutes=5,seconds=6)
Out[22]: Timedelta('3 days 04:05:06')
pd.Timedelta(seconds=12345)
Out[23]: Timedelta('0 days 03:25:45')
# Period 类,周期是一种有规律的时间间隔
now_week = pd.Period.now(freq='W')
now_week
Out[26]: Period('2021-04-26/2021-05-02', 'W-SUN') # 返回的是当前时刻所在周
# now_week的两个属性得到开始时刻和结束时刻的Timestamp对象
now_week.start_time,now_week.end_time
Out[29]: (Timestamp('2021-04-26 00:00:00'), Timestamp('2021-05-02 23:59:59.999999999'))
# 下面实例化Period类
pd.Period("1997-07") # 得到这个时刻的所在月
Out[31]: Period('1997-07', 'M')
pd.Period('1997-07-01','D')# 得到这个时刻的所在日
Out[32]: Period('1997-07-01', 'D')
# 通过Timestamp对象,借助freq所规定的频率创建Period对象
t1 = pd.Timestamp("1997-07-01")
t1.to_period('W')
Out[35]: Period('1997-06-30/1997-07-06', 'W-SUN')
now = pd.Timestamp.now()
now
Out[37]: Timestamp('2021-04-28 23:10:31.110898')
now.dayofyear # now 是一年当中的第几天
Out[38]: 118
now.dayofweek # now 是当周中的第几天
Out[39]: 2
now.hour
Out[40]: 23
# 和 datetime 这个标准库 做做比较
datetime.datetime(year=1997,month=7,day=1)
Out[42]: datetime.datetime(1997, 7, 1, 0, 0)
date_np = np.array("1997-07-01",dtype=np.datetime64)
date_np
Out[44]: array('1997-07-01', dtype='datetime64[D]')
# datetime64类型的对象是64位的精度存储的,这里datetime64[D]表示以日为单位
m = np.datetime64("1997-07") # 以“月”为单位
m
Out[47]: numpy.datetime64('1997-07')
m = np.datetime64("1997-07-01")
m
Out[49]: numpy.datetime64('1997-07-01')
m.dtype
Out[50]: dtype(')
n = np.datetime64("1997-07-01 19:19:00")
n,n.dtype
Out[52]: (numpy.datetime64('1997-07-01T19:19:00'), dtype(')) # 以秒为单位
n = np.datetime64("1997-07-01 19:19:00",'D') # 指定单位为‘日’
n,n.dtype
Out[55]: (numpy.datetime64('1997-07-01'), dtype('))
n = np.datetime64('1997-07-01','ns') #指定单位为‘纳秒’
n,n.dtype
Out[57]: (numpy.datetime64('1997-07-01T00:00:00.000000000'), dtype('))
date_np
Out[58]: array('1997-07-01', dtype='datetime64[D]')
date_np + np.arange(20)
Out[59]:
array(['1997-07-01', '1997-07-02', '1997-07-03', '1997-07-04',
'1997-07-05', '1997-07-06', '1997-07-07', '1997-07-08',
'1997-07-09', '1997-07-10', '1997-07-11', '1997-07-12',
'1997-07-13', '1997-07-14', '1997-07-15', '1997-07-16',
'1997-07-17', '1997-07-18', '1997-07-19', '1997-07-20'],
dtype='datetime64[D]')
# 在pandas中 ......
date_pd = pd.to_datetime('1st of July,1997')
date_pd
Out[61]: Timestamp('1997-07-01 00:00:00')
date_pd.strftime('%A')
Out[62]: 'Tuesday'
date_pd + pd.to_timedelta(np.arange(20),'D')
Out[64]:
DatetimeIndex(['1997-07-01', '1997-07-02', '1997-07-03', '1997-07-04',
'1997-07-05', '1997-07-06', '1997-07-07', '1997-07-08',
'1997-07-09', '1997-07-10', '1997-07-11', '1997-07-12',
'1997-07-13', '1997-07-14', '1997-07-15', '1997-07-16',
'1997-07-17', '1997-07-18', '1997-07-19', '1997-07-20'],
dtype='datetime64[ns]', freq=None)
date_pd + pd.to_timedelta(np.arange(5),'D')
Out[65]:
DatetimeIndex(['1997-07-01', '1997-07-02', '1997-07-03', '1997-07-04',
'1997-07-05'],
dtype='datetime64[ns]', freq=None)
# 时间索引:DatetimeIndex,TimedeltaIndex,PeriodIndex
# 第一种 以Timestamp为基础的索引DatetimeIndex
dates = [pd.Timestamp('1997-07-01'),pd.Timestamp('1997-7-2'),pd.Timestamp('1997-7-3')]
s = pd.Series([100,200,300],index=dates)
s
Out[70]:
1997-07-01 100
1997-07-02 200
1997-07-03 300
dtype: int64
s.index
Out[71]: DatetimeIndex(['1997-07-01', '1997-07-02', '1997-07-03'], dtype='datetime64[ns]', freq=None)
# 使用 pd.to_datetime函数更灵活一点
date_index = pd.to_datetime([datetime.datetime(1997,7,1),'3rd of July,1997','1997.7.1','1997-07-07','1997-Jul-8','19970710','10/7/1997'])
date_index
Out[75]:
DatetimeIndex(['1997-07-01', '1997-07-03', '1997-07-01', '1997-07-07',
'1997-07-08', '1997-07-10', '1997-10-07'],
dtype='datetime64[ns]', freq=None)
# pd.to_datetime的用法,如果参数是单个日期或时间的对象,会生成Timestamp对象;如果参数是一个日期或时间的序列,会生成一个DatetimeIndex类型的索引对象
periods = [pd.Period('1997-07'),pd.Period('1997-08'),pd.Period('1997-09')]
ps = pd.Series(np.linspace(100,200,3),index=periods)
ps
Out[80]:
1997-07 100.0
1997-08 150.0
1997-09 200.0
Freq: M, dtype: float64
ps.index
Out[81]: PeriodIndex(['1997-07', '1997-08', '1997-09'], dtype='period[M]', freq='M')
# 第二种 以Period为基础的索引PeriodIndex
# .to_period(freq='M')方法把DatetimeIndex转化为PeriodIndex
date_index
Out[83]:
DatetimeIndex(['1997-07-01', '1997-07-03', '1997-07-01', '1997-07-07',
'1997-07-08', '1997-07-10', '1997-10-07'],
dtype='datetime64[ns]', freq=None)
# 周期定位‘日’
date_index.to_period('D')
Out[84]:
PeriodIndex(['1997-07-01', '1997-07-03', '1997-07-01', '1997-07-07',
'1997-07-08', '1997-07-10', '1997-10-07'],
dtype='period[D]', freq='D')
# 周期定位‘月’
perior_index_m = date_index.to_period('M')
perior_index_m
Out[86]:
PeriodIndex(['1997-07', '1997-07', '1997-07', '1997-07', '1997-07', '1997-07',
'1997-10'],
dtype='period[M]', freq='M')
t = date_index[0]
t
Out[88]: Timestamp('1997-07-01 00:00:00')
# 第三种 以Timedelta为基础的索引TimedeltaIndex
delta_index = date_index -t
delta_index
Out[90]:
TimedeltaIndex(['0 days', '2 days', '0 days', '6 days', '7 days', '9 days',
'98 days'],
dtype='timedelta64[ns]', freq=None)
# 类似于np.arange(), 时间中有pd.date_range(),pd.period_range(),pd.timedelta_range()
pd.date_range('1997-01','1997-02')
Out[92]:
DatetimeIndex(['1997-01-01', '1997-01-02', '1997-01-03', '1997-01-04',
'1997-01-05', '1997-01-06', '1997-01-07', '1997-01-08',
'1997-01-09', '1997-01-10', '1997-01-11', '1997-01-12',
'1997-01-13', '1997-01-14', '1997-01-15', '1997-01-16',
'1997-01-17', '1997-01-18', '1997-01-19', '1997-01-20',
'1997-01-21', '1997-01-22', '1997-01-23', '1997-01-24',
'1997-01-25', '1997-01-26', '1997-01-27', '1997-01-28',
'1997-01-29', '1997-01-30', '1997-01-31', '1997-02-01'],
dtype='datetime64[ns]', freq='D')
# 注意这里的频率是‘日’,that is ‘D’
pd.date_range('1997-01','1997-02',freq='M')
Out[94]: DatetimeIndex(['1997-01-31'], dtype='datetime64[ns]', freq='M')
pd.date_range('1997-01','1997-03',freq='M')
Out[95]: DatetimeIndex(['1997-01-31', '1997-02-28'], dtype='datetime64[ns]', freq='M')
# 使用period参数,设置即将生成的DatetimeIndex对象中的“时刻”数量
pd.date_range('1997-01-01',periods = 8,freq = 'H')
Out[98]:
DatetimeIndex(['1997-01-01 00:00:00', '1997-01-01 01:00:00',
'1997-01-01 02:00:00', '1997-01-01 03:00:00',
'1997-01-01 04:00:00', '1997-01-01 05:00:00',
'1997-01-01 06:00:00', '1997-01-01 07:00:00'],
dtype='datetime64[ns]', freq='H')
pd.period_range('2017-07',periods=7,freq='M')
Out[99]:
PeriodIndex(['2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
'2018-01'],
dtype='period[M]', freq='M')
pd.timedelta_range(0,periods=7)
Out[101]:
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days', '5 days',
'6 days'],
dtype='timedelta64[ns]', freq='D')
pd.timedelta_range(0,periods=7,freq='3H30T')
Out[102]:
TimedeltaIndex(['0 days 00:00:00', '0 days 03:30:00', '0 days 07:00:00',
'0 days 10:30:00', '0 days 14:00:00', '0 days 17:30:00',
'0 days 21:00:00'],
dtype='timedelta64[ns]', freq='210T')
# resample 重采样部分
# 我感觉差不多就是时间索引意义上的groupby
dindex=pd.date_range('20/7/2017',periods=50,freq='D')
sdata = pd.Series(np.random.randn(len(dindex)),index=dindex)
sdata.index[0]
Out[107]: Timestamp('2017-07-20 00:00:00', freq='D')
sdata.index[-1]
Out[108]: Timestamp('2017-09-07 00:00:00', freq='D')
sdata.resample('M').mean()
Out[109]:
2017-07-31 -0.383048
2017-08-31 -0.142314
2017-09-30 0.515503
Freq: M, dtype: float64
# 表示每个月分成一组,各组计算平均值
sdata.resample('M',kind='period').mean()
Out[111]:
2017-07 -0.383048
2017-08 -0.142314
2017-09 0.515503
Freq: M, dtype: float64
# 上面的操作把‘日’频率将为‘月’频率,称为‘降采样’
t_inde = pd.date_range('2017-11-10',periods=20,freq='T')
t_inde
Out[114]:
DatetimeIndex(['2017-11-10 00:00:00', '2017-11-10 00:01:00',
'2017-11-10 00:02:00', '2017-11-10 00:03:00',
'2017-11-10 00:04:00', '2017-11-10 00:05:00',
'2017-11-10 00:06:00', '2017-11-10 00:07:00',
'2017-11-10 00:08:00', '2017-11-10 00:09:00',
'2017-11-10 00:10:00', '2017-11-10 00:11:00',
'2017-11-10 00:12:00', '2017-11-10 00:13:00',
'2017-11-10 00:14:00', '2017-11-10 00:15:00',
'2017-11-10 00:16:00', '2017-11-10 00:17:00',
'2017-11-10 00:18:00', '2017-11-10 00:19:00'],
dtype='datetime64[ns]', freq='T')
sdata2 = pd.Series(np.arange(len(t_inde)),index=t_inde)
sdata2
Out[116]:
2017-11-10 00:00:00 0
2017-11-10 00:01:00 1
2017-11-10 00:02:00 2
2017-11-10 00:03:00 3
2017-11-10 00:04:00 4
2017-11-10 00:05:00 5
2017-11-10 00:06:00 6
2017-11-10 00:07:00 7
2017-11-10 00:08:00 8
2017-11-10 00:09:00 9
2017-11-10 00:10:00 10
2017-11-10 00:11:00 11
2017-11-10 00:12:00 12
2017-11-10 00:13:00 13
2017-11-10 00:14:00 14
2017-11-10 00:15:00 15
2017-11-10 00:16:00 16
2017-11-10 00:17:00 17
2017-11-10 00:18:00 18
2017-11-10 00:19:00 19
Freq: T, dtype: int32
sdata2.resample('7min').sum()
Out[117]:
2017-11-10 00:00:00 21
2017-11-10 00:07:00 70
2017-11-10 00:14:00 99
Freq: 7T, dtype: int32
sdata2.resample('7min').max()
Out[118]:
2017-11-10 00:00:00 6
2017-11-10 00:07:00 13
2017-11-10 00:14:00 19
Freq: 7T, dtype: int32
sdata2.resample('7min',closed='right').max()
Out[119]:
2017-11-09 23:53:00 0
2017-11-10 00:00:00 7
2017-11-10 00:07:00 14
2017-11-10 00:14:00 19
Freq: 7T, dtype: int32
# asfreq() 也可以实现频率转化
sdata2.asfreq('7min')
Out[121]:
2017-11-10 00:00:00 0
2017-11-10 00:07:00 7
2017-11-10 00:14:00 14
Freq: 7T, dtype: int32
# 不同于resample()的是,asfreq()自动返回各个分组中末尾的值,不用紧跟聚集函数
# 下面,把‘日’频率上升到‘秒’,称为‘升采样’,对于缺失值,默认为Nan,\也可使用method参数设定缺失值是重复上面的值还是重复下面的值,也可使用
# fill_value参数指定是填充固定值
sdata2.asfreq('S')
Out[125]:
2017-11-10 00:00:00 0.0
2017-11-10 00:00:01 NaN
2017-11-10 00:00:02 NaN
2017-11-10 00:00:03 NaN
2017-11-10 00:00:04 NaN
...
2017-11-10 00:18:56 NaN
2017-11-10 00:18:57 NaN
2017-11-10 00:18:58 NaN
2017-11-10 00:18:59 NaN
2017-11-10 00:19:00 19.0
Freq: S, Length: 1141, dtype: float64
sdata2.asfreq('S',method='bfill') # 表示用缺失值下面的非缺失值填充缺失值
Out[127]:
2017-11-10 00:00:00 0
2017-11-10 00:00:01 1
2017-11-10 00:00:02 1
2017-11-10 00:00:03 1
2017-11-10 00:00:04 1
..
2017-11-10 00:18:56 19
2017-11-10 00:18:57 19
2017-11-10 00:18:58 19
2017-11-10 00:18:59 19
2017-11-10 00:19:00 19
Freq: S, Length: 1141, dtype: int32
sdata2.asfreq('S',method='ffill') # 表示用缺失值上面的非缺失值填充缺失值
Out[128]:
2017-11-10 00:00:00 0
2017-11-10 00:00:01 0
2017-11-10 00:00:02 0
2017-11-10 00:00:03 0
2017-11-10 00:00:04 0
..
2017-11-10 00:18:56 18
2017-11-10 00:18:57 18
2017-11-10 00:18:58 18
2017-11-10 00:18:59 18
2017-11-10 00:19:00 19
Freq: S, Length: 1141, dtype: int32
sdata2.asfreq('S',fill_value=999)
Out[129]:
2017-11-10 00:00:00 0
2017-11-10 00:00:01 999
2017-11-10 00:00:02 999
2017-11-10 00:00:03 999
2017-11-10 00:00:04 999
...
2017-11-10 00:18:56 999
2017-11-10 00:18:57 999
2017-11-10 00:18:58 999
2017-11-10 00:18:59 999
2017-11-10 00:19:00 19
Freq: S, Length: 1141, dtype: int32
# 获取某月的数据,有点像‘模糊查询’,查询出所有9月份的数据
sdata['2017-09']
Out[131]:
2017-09-01 -1.863481
2017-09-02 2.045290
2017-09-03 1.015021
2017-09-04 0.894747
2017-09-05 0.410238
2017-09-06 0.203433
2017-09-07 0.903272
Freq: D, dtype: float64