pandas通常是用于处理成组日期的,不管这些日期是 DataFrame的轴索引还是列。 to_datetime方法可以解析多种不同的日期表示形式。对标准日期格式(如 ISO8601)的解析非常快。
• 它还可以处理缺失值
• NaT: (not a time) 是pandas 中时间戳数据的NA值
代码演示部分:
from pandas import Series,DataFrame
import pandas as pd
import numpy as np
from datetime import datetime
#字符串形式的日期列表
datestr = ["7-6-2011","8-6-2011"]
#用pandas将字符串形式的日期转换为时间序列
pd.to_datetime(datestr)
#打印结果:
DatetimeIndex(['2011-07-06', '2011-08-06'], dtype='datetime64[ns]', freq=None)
idx = pd.to_datetime(datestr+[None])
idx
#打印结果:
DatetimeIndex(['2011-07-06', '2011-08-06', 'NaT'], dtype='datetime64[ns]', freq=None)
idx[1]
#打印结果:
Timestamp('2011-08-06 00:00:00')
type(idx[1])
#打印结果:
pandas._libs.tslib.Timestamp
#判断pandas时间日期序列中的值是否为NaT
#去除掉不是时间日期的元素
mask = pd.isnull(idx)
idx[~mask]
#打印结果:
DatetimeIndex(['2011-07-06', '2011-08-06'], dtype='datetime64[ns]', freq=None)
#NaN not a number
#NaT not a time
pandas最基本的时间序列类型就是以时间戳(通常以 Python字符串或datatime对象表示)为索引的Series。
代码演示如下:
#用pandas创建一组时间日期数据
dates = [datetime(2011,1,2),datetime(2011,1,5),datetime(2011,1,7),datetime(2011,1,8),datetime(2011,1,10),datetime(2011,1,12)]
ts = Series(np.random.randn(6),index = dates)
ts
#打印结果:
2011-01-02 -1.419891
2011-01-05 0.020097
2011-01-07 1.097442
2011-01-08 0.411043
2011-01-10 0.099531
2011-01-12 1.584115
dtype: float64
stamp = ts.index[2]
ts[stamp] #ts[2]
#打印结果:
1.0974416756711631
stamp
#打印结果:
Timestamp('2011-01-07 00:00:00')
ts['1/10/2011']
#打印结果:
0.09953092149498394
ts["20110110"]
#打印结果:
0.09953092149498394
ts["2011-1"]
#打印结果:
2011-01-02 -1.419891
2011-01-05 0.020097
2011-01-07 1.097442
2011-01-08 0.411043
2011-01-10 0.099531
2011-01-12 1.584115
dtype: float64
#通过pd.date_range()方法创建一组时间日期序列
long_str = Series(np.random.randn(1000),index = pd.date_range("1/1/2000",periods = 1000))
long_str
#打印结果:
2000-01-01 -0.787681
2000-01-02 -1.191930
2000-01-03 0.148416
2000-01-04 1.438159
2000-01-05 -0.007413
2000-01-06 -0.389526
2000-01-07 0.489829
2000-01-08 0.709332
2000-01-09 -0.350490
2000-01-10 1.773416
2000-01-11 -0.752938
2000-01-12 -0.769435
2000-01-13 -0.037980
2000-01-14 -0.449208
2000-01-15 -1.290371
2000-01-16 1.685488
2000-01-17 0.695263
2000-01-18 -0.816276
2000-01-19 1.568410
2000-01-20 0.310752
2000-01-21 -0.850422
2000-01-22 -1.091251
2000-01-23 1.757697
2000-01-24 -0.078308
2000-01-25 0.506119
2000-01-26 -0.049218
2000-01-27 1.252128
2000-01-28 0.787892
2000-01-29 -0.292143
2000-01-30 -1.351507
...
2002-08-28 0.472796
2002-08-29 -0.912850
2002-08-30 1.056446
2002-08-31 0.691386
2002-09-01 -0.569072
2002-09-02 0.408966
2002-09-03 -1.912495
2002-09-04 0.375794
2002-09-05 0.260600
2002-09-06 0.685340
2002-09-07 -0.594120
2002-09-08 -1.521450
2002-09-09 0.996173
2002-09-10 -0.157691
2002-09-11 0.647485
2002-09-12 -0.485745
2002-09-13 -0.189201
2002-09-14 0.645380
2002-09-15 2.015986
2002-09-16 0.422788
2002-09-17 -0.068758
2002-09-18 -0.510743
2002-09-19 -0.580900
2002-09-20 0.961114
2002-09-21 1.288747
2002-09-22 -0.468706
2002-09-23 1.444036
2002-09-24 1.087008
2002-09-25 2.059874
2002-09-26 1.254894
Freq: D, Length: 1000, dtype: float64
#只获取2001年的所有数据
long_str['2001']
#打印结果:
2001-01-01 0.838627
2001-01-02 -0.032586
2001-01-03 -0.391376
2001-01-04 -0.649662
2001-01-05 -0.073540
2001-01-06 0.731106
2001-01-07 -0.226501
2001-01-08 -0.597196
2001-01-09 0.167759
2001-01-10 -0.353340
2001-01-11 1.104831
2001-01-12 0.064189
2001-01-13 -2.186897
2001-01-14 0.271867
2001-01-15 -1.058974
2001-01-16 -0.543559
2001-01-17 -0.687577
2001-01-18 -0.758481
2001-01-19 -0.195182
2001-01-20 1.118825
2001-01-21 1.675674
2001-01-22 -0.673984
2001-01-23 0.869181
2001-01-24 -0.723529
2001-01-25 -0.575066
2001-01-26 0.276656
2001-01-27 -0.485819
2001-01-28 -0.834541
2001-01-29 1.428966
2001-01-30 0.732680
...
2001-12-02 0.646727
2001-12-03 0.734419
2001-12-04 -0.638702
2001-12-05 0.100512
2001-12-06 1.232382
2001-12-07 0.505382
2001-12-08 0.310156
2001-12-09 0.665944
2001-12-10 -2.135604
2001-12-11 0.557466
2001-12-12 0.513930
2001-12-13 -0.268076
2001-12-14 0.312687
2001-12-15 0.379057
2001-12-16 1.146886
2001-12-17 -0.375350
2001-12-18 -1.252033
2001-12-19 -0.443089
2001-12-20 1.164611
2001-12-21 -1.554070
2001-12-22 -0.956719
2001-12-23 0.418286
2001-12-24 0.341350
2001-12-25 -1.812035
2001-12-26 -1.146693
2001-12-27 -0.141947
2001-12-28 -0.327807
2001-12-29 0.665398
2001-12-30 1.012688
2001-12-31 -0.199513
Freq: D, Length: 365, dtype: float64
#获取某年某月所有数据
long_str['2001-12']
#打印结果:
2001-12-01 -0.789236
2001-12-02 0.646727
2001-12-03 0.734419
2001-12-04 -0.638702
2001-12-05 0.100512
2001-12-06 1.232382
2001-12-07 0.505382
2001-12-08 0.310156
2001-12-09 0.665944
2001-12-10 -2.135604
2001-12-11 0.557466
2001-12-12 0.513930
2001-12-13 -0.268076
2001-12-14 0.312687
2001-12-15 0.379057
2001-12-16 1.146886
2001-12-17 -0.375350
2001-12-18 -1.252033
2001-12-19 -0.443089
2001-12-20 1.164611
2001-12-21 -1.554070
2001-12-22 -0.956719
2001-12-23 0.418286
2001-12-24 0.341350
2001-12-25 -1.812035
2001-12-26 -1.146693
2001-12-27 -0.141947
2001-12-28 -0.327807
2001-12-29 0.665398
2001-12-30 1.012688
2001-12-31 -0.199513
Freq: D, dtype: float64
#创建指定范围内的时间日期序列
index = pd.date_range("4/1/2012","6/1/2012")
index
#打印结果:
DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
'2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
'2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
'2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
'2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
'2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
'2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
'2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
'2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
'2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
'2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
'2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
'2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
'2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
'2012-05-27', '2012-05-28', '2012-05-29', '2012-05-30',
'2012-05-31', '2012-06-01'],
dtype='datetime64[ns]', freq='D')
#指定pd.date_range()的开始和结束日期创建时间日期序列
dateIndex = pd.date_range(start="4/1/2012",periods=20)
dateIndex
#打印结果:
DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
'2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
'2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
'2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
'2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
dtype='datetime64[ns]', freq='D')
dateIndex = pd.date_range(end="6/1/2012",periods=20)
dateIndex
#打印结果:
DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
'2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
'2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
'2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
'2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
dtype='datetime64[ns]', freq='D')