01-pandas时间类型dt类属性及方法

pandas时间类型的所有属性及方法

方便今后自己复习

import pandas as pd
from datetime import datetime, timedelta
import locale

data = {
    'ID': ['000{}'.format(str(i)) for i in range(1, 7)],
    'name': ['aaa', 'bbb', 'ccc', 'ddd', 'eee', 'fff'],
    'gender': [True, True, False, True, False, True],
    'birthday': [datetime(2004, 10, 1, 10, 32, 45, 85), datetime(2000, 11, 27), 
                 datetime(2002, 1, 27), datetime(2002, 8, 15),
                 datetime(2003, 1, 1), datetime(2002, 12, 31)],
    'height': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]
}

df = pd.DataFrame(data)
df['birthday']
0   2004-10-01 10:32:45.000085
1   2000-11-27 00:00:00.000000
2   2002-01-27 00:00:00.000000
3   2002-08-15 00:00:00.000000
4   2003-01-01 00:00:00.000000
5   2002-12-31 00:00:00.000000
Name: birthday, dtype: datetime64[ns]
df['birthday'].dt.year  # 获取年
0    2004
1    2000
2    2002
3    2002
4    2003
5    2002
Name: birthday, dtype: int64
df['birthday'].dt.month  # 获取月
0    10
1    11
2     1
3     8
4     1
5    12
Name: birthday, dtype: int64
df['birthday'].dt.day  # 获取天
0     1
1    27
2    27
3    15
4     1
5    31
Name: birthday, dtype: int64
df['birthday'].dt.time  # 获取时间
0    10:32:45.000085
1           00:00:00
2           00:00:00
3           00:00:00
4           00:00:00
5           00:00:00
Name: birthday, dtype: object
df['birthday'].dt.timetz  # 获取时间
0    10:32:45.000085
1           00:00:00
2           00:00:00
3           00:00:00
4           00:00:00
5           00:00:00
Name: birthday, dtype: object
df['birthday'].dt.hour  # 获取小时
0    10
1     0
2     0
3     0
4     0
5     0
Name: birthday, dtype: int64
df['birthday'].dt.second  # 获取分钟
0    45
1     0
2     0
3     0
4     0
5     0
Name: birthday, dtype: int64
 df['birthday'].dt.microsecond  # 获取毫秒
0    85
1     0
2     0
3     0
4     0
5     0
Name: birthday, dtype: int64
df['birthday'].dt.day_name()  # 获取星期
0       Friday
1       Monday
2       Sunday
3     Thursday
4    Wednesday
5      Tuesday
Name: birthday, dtype: object
df['birthday'].dt.weekday  # 获取一周的第几天
0    4
1    0
2    6
3    3
4    2
5    1
Name: birthday, dtype: int64
df['birthday'].dt.dayofweek  # 获取一周的第几天
0    4
1    0
2    6
3    3
4    2
5    1
Name: birthday, dtype: int64
df['birthday'].dt.dayofyear  # 获取一年的第多少天
0    275
1    332
2     27
3    227
4      1
5    365
Name: birthday, dtype: int64
df['birthday'].dt.is_leap_year  # 判断是否为闰年
0     True
1     True
2    False
3    False
4    False
5    False
Name: birthday, dtype: bool
df['birthday'].dt.is_month_end  # 判断是否是一个月的最后一天
0    False
1    False
2    False
3    False
4    False
5     True
Name: birthday, dtype: bool
df['birthday'].dt.is_quarter_end  # 判断是否是季度的最后一天
0    False
1    False
2    False
3    False
4    False
5     True
Name: birthday, dtype: bool
df['birthday'].dt.is_quarter_start  # 判断是否是季度的第一天
0     True
1    False
2    False
3    False
4     True
5    False
Name: birthday, dtype: bool
df['birthday'].dt.is_year_start  # 判断是否是一年的第一天
0    False
1    False
2    False
3    False
4     True
5    False
Name: birthday, dtype: bool
df['birthday'].dt.quarter  # 判断是一年的第几个季度
0    4
1    4
2    1
3    3
4    1
5    4
Name: birthday, dtype: int64
df['birthday'].dt.to_period(freq='Y')  # freq 的值选择要获取的部分
0    2004
1    2000
2    2002
3    2002
4    2003
5    2002
Name: birthday, dtype: period[A-DEC]
df['birthday'].dt.to_pydatetime()  # 返回datetime模块的时间类型
array([datetime.datetime(2004, 10, 1, 10, 32, 45, 85),
       datetime.datetime(2000, 11, 27, 0, 0),
       datetime.datetime(2002, 1, 27, 0, 0),
       datetime.datetime(2002, 8, 15, 0, 0),
       datetime.datetime(2003, 1, 1, 0, 0),
       datetime.datetime(2002, 12, 31, 0, 0)], dtype=object)
df['birthday'].dt.strftime('%Y/%m/%d')  # 修改日期格式,
0    2004/10/01
1    2000/11/27
2    2002/01/27
3    2002/08/15
4    2003/01/01
5    2002/12/31
Name: birthday, dtype: object
df['birthday'].dt.floor('min')  # 向下取整 可选参数有 'D','H','S', 'min'
0   2004-10-01 10:32:00
1   2000-11-27 00:00:00
2   2002-01-27 00:00:00
3   2002-08-15 00:00:00
4   2003-01-01 00:00:00
5   2002-12-31 00:00:00
Name: birthday, dtype: datetime64[ns]
df['birthday'].dt.ceil('H')  # 向上取整
0   2004-10-01 11:00:00
1   2000-11-27 00:00:00
2   2002-01-27 00:00:00
3   2002-08-15 00:00:00
4   2003-01-01 00:00:00
5   2002-12-31 00:00:00
Name: birthday, dtype: datetime64[ns]
df['birthday'].dt.round('H')  # 类似于四舍五入
0   2004-10-01 11:00:00
1   2000-11-27 00:00:00
2   2002-01-27 00:00:00
3   2002-08-15 00:00:00
4   2003-01-01 00:00:00
5   2002-12-31 00:00:00
Name: birthday, dtype: datetime64[ns]
df1 = df['birthday'].dt.tz_localize('Asia/Shanghai')  # 按时区标准化
df1
0   2004-10-01 10:32:45.000085+08:00
1          2000-11-27 00:00:00+08:00
2          2002-01-27 00:00:00+08:00
3          2002-08-15 00:00:00+08:00
4          2003-01-01 00:00:00+08:00
5          2002-12-31 00:00:00+08:00
Name: birthday, dtype: datetime64[ns, Asia/Shanghai]
df1.dt.tz  # 获取时区信息

df1.dt.tz_convert(None)  # 转换时区,None表示转换为UTC时间后将删除时区
0   2004-10-01 02:32:45.000085
1   2000-11-26 16:00:00.000000
2   2002-01-26 16:00:00.000000
3   2002-08-14 16:00:00.000000
4   2002-12-31 16:00:00.000000
5   2002-12-30 16:00:00.000000
Name: birthday, dtype: datetime64[ns]

转换案例

import datetime

def strplus(val):
    if len(str(val)) == 1:
        return '0'+str(val)
    else:
        return str(val)

def gender(val):
    if val:
        return 'male'
    else:
        return 'Female'
    
df['year'] = df['birthday'].dt.year
df['age'] = datetime.datetime.now().year - df['birthday'].dt.year
df['birth'] = df['birthday'].dt.year.apply(str).str.cat(df['birthday'].dt.month.apply(strplus), sep='年')
df['gender'] = df['gender'].apply(gender)
df
ID name gender birthday height year age birth
0 0001 aaa male 2004-10-01 10:32:45.000085 1.1 2004 16 2004年10
1 0002 bbb male 2000-11-27 00:00:00.000000 1.2 2000 20 2000年11
2 0003 ccc male 2002-01-27 00:00:00.000000 1.3 2002 18 2002年01
3 0004 ddd male 2002-08-15 00:00:00.000000 1.4 2002 18 2002年08
4 0005 eee male 2003-01-01 00:00:00.000000 1.5 2003 17 2003年01
5 0006 fff male 2002-12-31 00:00:00.000000 1.6 2002 18 2002年12
df['name'].groupby(df['birthday'].dt.year).count()
birthday
2000    1
2002    3
2003    1
2004    1
Name: name, dtype: int64

你可能感兴趣的:(python,python,数据分析)