参考视频、官网、和微信相关文章
官网:https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
datetime模块是python处理时间和日期的标准库,主要:datetime.date(), datetime.datetime(), datetime.timedelta()
1、datetime.date(year,month,day)日期 最小单位是天数
2、datetime.time(hour,min,sec)时间,最小单位是微秒
3、datetime.datetime date和time的合集,必须包含日期,可以省略时间。
4、date和datetime可以利用timedelta进行时间和日期的加减运算,也可以利用toordinal和timetuple变为时间戳或时间元组,但time不能。
import numpy as np
import pandas as pd
import datetime
#datetime.time
t1=datetime.time(8,12,4)
t2=datetime.time(9)#省略参数默认0
print(t1,type(t1))
print(str(t2),type(str(t2)))
print(t1<t2,'\n')#可以比较不能加减
#datetime.date
t3=datetime.date.today()
t4=datetime.date(1981,3,27)
delta=datetime.timedelta(10)#timedelta时间差
print(t3,type(t3))
print(str(t4),type(str(t4)))
print(t3-t4)
print(t3+delta,'\n')
'''
08:12:04
09:00:00
True
2019-11-15
1981-03-27
14112 days, 0:00:00
2019-11-25
'''
#datetime.datetime
t1=datetime.datetime.now()
t2=datetime.datetime(2019,11,3)#年月日不能缺省,时间缺省默认0
t3=datetime.datetime(2019,11,3,13,7)
delta=datetime.timedelta(10)#timedelta时间差
print(t1,type(t1))
print(t1.year,type(t1.year))#返回int年月日参数
print(t1.month,type(t1.month))
print(t1.day,type(t1.day))
print(t1-t2)
print(t2+delta)
x=t1.toordinal()#变成时间戳
print(x,type(x))
y=t1.timetuple()#变成时间tuple
print(y,type(y))
z=t1.__format__('%Y/%m/%d')#转变其他格式
print(z)
'''
2019-11-15 15:05:01.251865
2019
11
15
12 days, 15:05:01.251865
2019-11-13 00:00:00
737378
time.struct_time(tm_year=2019, tm_mon=11, tm_mday=15, tm_hour=15, tm_min=5, tm_sec=1, tm_wday=4, tm_yday=319, tm_isdst=-1)
2019/11/15
'''
delta1=datetime.timedelta(10)#timedelta时间差,默认天
delta2=datetime.timedelta(days=1,hours=2,minutes=3,seconds=4)#timedelta时间差
t1=datetime.datetime(2019,11,3,13,7)
t2=datetime.date.today()
print(t1,t2)
print(delta1,delta2)
print(t1+delta2)
print(t2+delta2)
'''
2019-11-03 13:07:00 2019-11-15
10 days, 0:00:00 1 day, 2:03:04
2019-11-04 15:10:04
2019-11-16
'''
#字符串转换datetime
from dateutil.parser import parse
d1='1981-03-27'
d2='27/3/1981'
d3='3/27/1981'
print(parse(d1),type(parse(d1)))
print(parse(d2))
print(parse(d3,dayfirst=True))#月在日前dayfirst=True
#datetime转str按格式输出
now=datetime.datetime.now()
print(now)
print(now.strftime('%Y/%m/%d %H:%M:%S'))
print(now.__format__('%Y/%m/%d'))#转变其他格式
'''
1981-03-27 00:00:00
1981-03-27 00:00:00
1981-03-27 00:00:00
2019-11-15 15:37:56.693854
2019/11/15 15:37:56
2019/11/15
'''
#datetime timestamp转换
t=datetime.datetime(2019,11,3,13,7)
t2=t.timestamp()#转换成timestamp时间戳(唯一确定,计算机存储时间格式)
print(t,type(t))
print(t2,type(t2))
'''
2019-11-03 13:07:00
1572757620.0
'''
建立pandas时间戳的主要可以通过两种方式:
date=datetime.datetime(2019,11,14)#
t1=pd.Timestamp(2019,11,14)
t2=pd.Timestamp('19810327105512')#
t3=pd.Timestamp('2018-09-18 10:33:03')
t4=pd.Timestamp(date)
date1='19810327 12:00'#字符串
date2=datetime.datetime(2019,11,14,13,7)#datetime
t1=pd.to_datetime(date1)
t2=pd.to_datetime(date2)
print(t1,type(t1))
print(t2,type(t2))
date=['20180327','20190423','20060504']#list
t3=pd.to_datetime(date)
print(t3,type(t3))
# 当一组时间序列中夹杂其他格式数据,可用errors参数返回,默认errors='raise'报错
# errors = 'ignore':不可解析时返回原始输入,这里就是直接生成一般数组
# errors = 'coerce':不可扩展,缺失值返回NaT(Not a Time),结果认为DatetimeIndex
date3 = ['2017-2-1','2017-2-2','2017-2-3','hello world!']
t3 = pd.to_datetime(date3, errors = 'ignore')
print(t3,type(t3))
t4 = pd.to_datetime(date3, errors = 'coerce')
print(t4,type(t4))
'''
Index(['2017-2-1', '2017-2-2', '2017-2-3', 'hello world!'], dtype='object')
DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', 'NaT'], dtype='datetime64[ns]', freq=None)
'''
时间序列指index是DatetimeIndex的Series或DataFrame,拥有普通序列的所有特性的同时,还具有时间相关的一些特性。
#DatetimeIndex 可用作Series或DataFrame的索引
#TimeStamp作为索引将被强制转化为DatetimeIndex
#TimeSeries 时间序列:以DatetimeIndex为index
t1=pd.DatetimeIndex(['20190327','3-27-2018','2017/03/27'])
s1=pd.Series(['Bob','Lily','Alice'],index=t1)
print(t1)
print(s1)
date=['20180327','20190423','20060504']
t2=pd.to_datetime(date)
s2=pd.Series(['Bob','Lily','Alice'],index=t2)
print(t2)
print(s2)
date2=[pd.Timestamp('20180327'),pd.Timestamp('20190423'),pd.Timestamp('20060504')]
s3=pd.Series(['Bob','Lily','Alice'],index=date2)
print(date2)
print(s3.index)
按日期范围返回生成时间戳索引
#start + end
d1 = pd.date_range('2019/03/27','4/14/2019')
#start/end + periods
d2 = pd.date_range(start = '2019-03-27', periods = 10)
d3 = pd.date_range(end = '2019/04/05 15:00:00', periods = 10) # 增加了时、分、秒
#closed
print(pd.date_range('20190327','20190330'))#左右闭区间
print(pd.date_range('20190327','20190330',closed = 'right'))#不包含start
print(pd.date_range('20190327','20190330',closed = 'left'))#不包含end日
'''
DatetimeIndex(['2019-03-27', '2019-03-28', '2019-03-29', '2019-03-30'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2019-03-28', '2019-03-29', '2019-03-30'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2019-03-27', '2019-03-28', '2019-03-29'], dtype='datetime64[ns]', freq='D')
'''
print(pd.bdate_range('20170101','20170107'))
# pd.bdate_range()默认频率为工作日
print(list(pd.date_range(start = '1/1/2017', periods = 10)))
# 直接转化为list,list中元素为Timestamp
默认频率为’D’天
method为index对应值的插值模式:None不插值NaN,ffill用第一个值填充后面,bfill用后一个值填充前面
ts = pd.Series(np.random.rand(3),index = pd.date_range('20191001','20191003'))
print(ts)
print(ts.asfreq('8H'))
print(ts.asfreq('8H30S',method = 'ffill'))
print(ts.asfreq('8H30S',method = 'bfill'))
使用freq参数,偏移index
不使用freq参数,偏移values
正数表示数值后移,负数表示数值前移,可通过偏移计算比例等。
ts = pd.Series(np.random.rand(4),index = pd.date_range('20191001','201910`在这里插入代码片`04'))
print(ts)
print(ts.shift(2))
print(ts.shift(-2))
print(ts.shift(2, freq = 'D'))
print(ts.shift(2, freq = 'T'))
Series和DataFrame中所有索引切片功能均可用于时间序列
重点关注重采样部分
import pandas as pd
import numpy as np
date=pd.date_range('2019/1/1','2019/1/10')
ts=pd.Series(np.random.rand(len(date)),index=date)
print(ts.head())
print(ts['2019/1/1'])
print(ts['2019-01-01'])
#切片
print(ts['2019-01-02':'2019-01-05'])#闭区间
print(ts.loc['2019-01-02':'2019-01-05'])
print(ts['2019-01'])#生成该月份全部
print(ts['2019-01'][::2])#切片,间隔为2
date=pd.date_range('2019-11-11','2019-11-16')
ts=pd.Series(np.arange(len(date)),index=date)
print(ts)
print(ts.resample('1D12H'))
print(ts.resample('1D12H').mean())
print(ts.resample('12H'))
print(ts.resample('12H').ffill())