方便今后自己复习
import pandas as pd
from datetime import datetime, timedelta
import locale
data = {
'ID': ['000{}'.format(str(i)) for i in range(1, 7)],
'name': ['aaa', 'bbb', 'ccc', 'ddd', 'eee', 'fff'],
'gender': [True, True, False, True, False, True],
'birthday': [datetime(2004, 10, 1, 10, 32, 45, 85), datetime(2000, 11, 27),
datetime(2002, 1, 27), datetime(2002, 8, 15),
datetime(2003, 1, 1), datetime(2002, 12, 31)],
'height': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]
}
df = pd.DataFrame(data)
df['birthday']
0 2004-10-01 10:32:45.000085
1 2000-11-27 00:00:00.000000
2 2002-01-27 00:00:00.000000
3 2002-08-15 00:00:00.000000
4 2003-01-01 00:00:00.000000
5 2002-12-31 00:00:00.000000
Name: birthday, dtype: datetime64[ns]
df['birthday'].dt.year # 获取年
0 2004
1 2000
2 2002
3 2002
4 2003
5 2002
Name: birthday, dtype: int64
df['birthday'].dt.month # 获取月
0 10
1 11
2 1
3 8
4 1
5 12
Name: birthday, dtype: int64
df['birthday'].dt.day # 获取天
0 1
1 27
2 27
3 15
4 1
5 31
Name: birthday, dtype: int64
df['birthday'].dt.time # 获取时间
0 10:32:45.000085
1 00:00:00
2 00:00:00
3 00:00:00
4 00:00:00
5 00:00:00
Name: birthday, dtype: object
df['birthday'].dt.timetz # 获取时间
0 10:32:45.000085
1 00:00:00
2 00:00:00
3 00:00:00
4 00:00:00
5 00:00:00
Name: birthday, dtype: object
df['birthday'].dt.hour # 获取小时
0 10
1 0
2 0
3 0
4 0
5 0
Name: birthday, dtype: int64
df['birthday'].dt.second # 获取分钟
0 45
1 0
2 0
3 0
4 0
5 0
Name: birthday, dtype: int64
df['birthday'].dt.microsecond # 获取毫秒
0 85
1 0
2 0
3 0
4 0
5 0
Name: birthday, dtype: int64
df['birthday'].dt.day_name() # 获取星期
0 Friday
1 Monday
2 Sunday
3 Thursday
4 Wednesday
5 Tuesday
Name: birthday, dtype: object
df['birthday'].dt.weekday # 获取一周的第几天
0 4
1 0
2 6
3 3
4 2
5 1
Name: birthday, dtype: int64
df['birthday'].dt.dayofweek # 获取一周的第几天
0 4
1 0
2 6
3 3
4 2
5 1
Name: birthday, dtype: int64
df['birthday'].dt.dayofyear # 获取一年的第多少天
0 275
1 332
2 27
3 227
4 1
5 365
Name: birthday, dtype: int64
df['birthday'].dt.is_leap_year # 判断是否为闰年
0 True
1 True
2 False
3 False
4 False
5 False
Name: birthday, dtype: bool
df['birthday'].dt.is_month_end # 判断是否是一个月的最后一天
0 False
1 False
2 False
3 False
4 False
5 True
Name: birthday, dtype: bool
df['birthday'].dt.is_quarter_end # 判断是否是季度的最后一天
0 False
1 False
2 False
3 False
4 False
5 True
Name: birthday, dtype: bool
df['birthday'].dt.is_quarter_start # 判断是否是季度的第一天
0 True
1 False
2 False
3 False
4 True
5 False
Name: birthday, dtype: bool
df['birthday'].dt.is_year_start # 判断是否是一年的第一天
0 False
1 False
2 False
3 False
4 True
5 False
Name: birthday, dtype: bool
df['birthday'].dt.quarter # 判断是一年的第几个季度
0 4
1 4
2 1
3 3
4 1
5 4
Name: birthday, dtype: int64
df['birthday'].dt.to_period(freq='Y') # freq 的值选择要获取的部分
0 2004
1 2000
2 2002
3 2002
4 2003
5 2002
Name: birthday, dtype: period[A-DEC]
df['birthday'].dt.to_pydatetime() # 返回datetime模块的时间类型
array([datetime.datetime(2004, 10, 1, 10, 32, 45, 85),
datetime.datetime(2000, 11, 27, 0, 0),
datetime.datetime(2002, 1, 27, 0, 0),
datetime.datetime(2002, 8, 15, 0, 0),
datetime.datetime(2003, 1, 1, 0, 0),
datetime.datetime(2002, 12, 31, 0, 0)], dtype=object)
df['birthday'].dt.strftime('%Y/%m/%d') # 修改日期格式,
0 2004/10/01
1 2000/11/27
2 2002/01/27
3 2002/08/15
4 2003/01/01
5 2002/12/31
Name: birthday, dtype: object
df['birthday'].dt.floor('min') # 向下取整 可选参数有 'D','H','S', 'min'
0 2004-10-01 10:32:00
1 2000-11-27 00:00:00
2 2002-01-27 00:00:00
3 2002-08-15 00:00:00
4 2003-01-01 00:00:00
5 2002-12-31 00:00:00
Name: birthday, dtype: datetime64[ns]
df['birthday'].dt.ceil('H') # 向上取整
0 2004-10-01 11:00:00
1 2000-11-27 00:00:00
2 2002-01-27 00:00:00
3 2002-08-15 00:00:00
4 2003-01-01 00:00:00
5 2002-12-31 00:00:00
Name: birthday, dtype: datetime64[ns]
df['birthday'].dt.round('H') # 类似于四舍五入
0 2004-10-01 11:00:00
1 2000-11-27 00:00:00
2 2002-01-27 00:00:00
3 2002-08-15 00:00:00
4 2003-01-01 00:00:00
5 2002-12-31 00:00:00
Name: birthday, dtype: datetime64[ns]
df1 = df['birthday'].dt.tz_localize('Asia/Shanghai') # 按时区标准化
df1
0 2004-10-01 10:32:45.000085+08:00
1 2000-11-27 00:00:00+08:00
2 2002-01-27 00:00:00+08:00
3 2002-08-15 00:00:00+08:00
4 2003-01-01 00:00:00+08:00
5 2002-12-31 00:00:00+08:00
Name: birthday, dtype: datetime64[ns, Asia/Shanghai]
df1.dt.tz # 获取时区信息
df1.dt.tz_convert(None) # 转换时区,None表示转换为UTC时间后将删除时区
0 2004-10-01 02:32:45.000085
1 2000-11-26 16:00:00.000000
2 2002-01-26 16:00:00.000000
3 2002-08-14 16:00:00.000000
4 2002-12-31 16:00:00.000000
5 2002-12-30 16:00:00.000000
Name: birthday, dtype: datetime64[ns]
import datetime
def strplus(val):
if len(str(val)) == 1:
return '0'+str(val)
else:
return str(val)
def gender(val):
if val:
return 'male'
else:
return 'Female'
df['year'] = df['birthday'].dt.year
df['age'] = datetime.datetime.now().year - df['birthday'].dt.year
df['birth'] = df['birthday'].dt.year.apply(str).str.cat(df['birthday'].dt.month.apply(strplus), sep='年')
df['gender'] = df['gender'].apply(gender)
df
ID | name | gender | birthday | height | year | age | birth | |
---|---|---|---|---|---|---|---|---|
0 | 0001 | aaa | male | 2004-10-01 10:32:45.000085 | 1.1 | 2004 | 16 | 2004年10 |
1 | 0002 | bbb | male | 2000-11-27 00:00:00.000000 | 1.2 | 2000 | 20 | 2000年11 |
2 | 0003 | ccc | male | 2002-01-27 00:00:00.000000 | 1.3 | 2002 | 18 | 2002年01 |
3 | 0004 | ddd | male | 2002-08-15 00:00:00.000000 | 1.4 | 2002 | 18 | 2002年08 |
4 | 0005 | eee | male | 2003-01-01 00:00:00.000000 | 1.5 | 2003 | 17 | 2003年01 |
5 | 0006 | fff | male | 2002-12-31 00:00:00.000000 | 1.6 | 2002 | 18 | 2002年12 |
df['name'].groupby(df['birthday'].dt.year).count()
birthday
2000 1
2002 3
2003 1
2004 1
Name: name, dtype: int64