(三篇长文让你玩6Pandas)数据分析入门_PART2常用工具包_CH02数据分析工具:Pandas__Part02(时间序列全面分析)

'''
【课程2.8】  时间模块:datetime

datetime模块,主要掌握:datetime.date(), datetime.datetime(), datetime.timedelta()

日期解析方法:parser.parse

'''
'\n【课程2.8】  时间模块:datetime\n\ndatetime模块,主要掌握:datetime.date(), datetime.datetime(), datetime.timedelta()\n\n日期解析方法:parser.parse\n\n'
# datetime.date:date对象

import datetime

# datetime.date.today 返回今日
# date(年,月,日) → 直接得到当时日期
# 输出格式为 datetime.date类
today=datetime.date.today()
print(today,type(today))

t=datetime.date(2016,1,1)
print(t,type(t))


2019-07-01 
2016-01-01 
# datetime.datetime:datetime对象!重点

# datetime.datetime.now()方法,输出当前时间
# datetime.datetime(年,月,日,时,分,秒),至少输入年月日
# 输出格式为 datetime类
# 可通过str()转化为字符串
now=datetime.datetime.now()
t1=datetime.datetime(2019,6,26)
t2=datetime.datetime(2019,6,26,11,55,55)
print(now,type(now))
print(str(now),type(str(now)))
print(t1,type(t1))
print(t2,type(t2))

tdelta=t2-t1
print(tdelta,type(tdelta))
# 相减得到时间差 —— timedelta
2019-07-01 12:55:12.485330 
2019-07-01 12:55:12.485330 
2019-06-26 00:00:00 
2019-06-26 11:55:55 
11:55:55 
# datetime.timedelta:时间差
# 时间差主要用作时间的加减法,相当于可被识别的时间“差值”
today=datetime.datetime.today() # datetime.datetime也有today()方法
yestoday=today-datetime.timedelta(1)
print(today)
print(yestoday)

2019-07-01 12:55:12.492313
2019-06-30 12:55:12.492313
# parser.parse:日期字符串转换

from dateutil.parser import parse

date='12-21-2017'
t=parse(date)
print(t,type(t))
# 直接将str转化成datetime.datetime

print(parse('2000-1-1'),'\n',
     parse('5/1/2014'),'\n',
     parse('5/1/2014', dayfirst = True),'\n',  # 国际通用格式中,日在月之前,可以通过dayfirst来设置
     parse('22/1/2014'),'\n',
     parse('Jan 31, 1997 10:45 PM'))
# 各种格式可以解析,但无法支持中文


2017-12-21 00:00:00 
2000-01-01 00:00:00 
 2014-05-01 00:00:00 
 2014-01-05 00:00:00 
 2014-01-22 00:00:00 
 1997-01-31 22:45:00
#作业answer

import datetime
#作业1
d1=datetime.datetime.now()
print('请输出当前时间:',d1)

d2=datetime.datetime(2017,5,1)
print('请输出时间:',d2)

d3=datetime.datetime(2000,12,1)
print('请输出时间:',d3)
请输出当前时间: 2019-07-01 12:55:12.506274
请输出时间: 2017-05-01 00:00:00
请输出时间: 2000-12-01 00:00:00
#作业2
d=datetime.datetime(2000,5,1)
print(d+datetime.timedelta(1000))
2003-01-26 00:00:00
'''
【课程2.9】  Pandas时刻数据:Timestamp

时刻数据代表时间点,是pandas的数据类型,是将值与时间点相关联的最基本类型的时间序列数据

pandas.Timestamp()

'''
'\n【课程2.9】  Pandas时刻数据:Timestamp\n\n时刻数据代表时间点,是pandas的数据类型,是将值与时间点相关联的最基本类型的时间序列数据\n\npandas.Timestamp()\n\n'
# pd.Timestamp()
import numpy as np
import pandas as pd
date1=datetime.datetime(2016,12,1,12,12,12)# 创建一个datetime.datetime
date2='2017 12 21'# 创建一个字符串
t1=pd.Timestamp(date1)
t2=pd.Timestamp(date2)
print(t1,type(t1))
print(t2,type(t2))
# 直接生成pandas的时刻数据 → 时间戳
# 数据类型为 pandas的Timestamp

#注1 np.Timestamp和datetime.datetime都是时间戳只是所属模块不同,np.Timestamp更常用
#Timestamp(t)方法既可以识别时间格式字符串(常用方式都可)也可识别datetime
2016-12-01 12:12:12 
2017-12-21 00:00:00 
# pd.to_datetime

from datetime import datetime

date1 = datetime(2016,12,1,12,45,30)
date2 = '2017-12-21'
t1 = pd.to_datetime(date1)
t2 = pd.to_datetime(date2)
print(t1,type(t1))
print(t2,type(t2))
# pd.to_datetime():如果是单个时间数据,转换成pandas的时刻数据,数据类型为Timestamp

lst_date=['2017-12-21', '2017-12-22', '2017-12-23']
t3=pd.to_datetime(lst_date)
print(t3,type(t3))
# 多个时间数据,将会转换为pandas的DatetimeIndex
2016-12-01 12:45:30 
2017-12-21 00:00:00 
DatetimeIndex(['2017-12-21', '2017-12-22', '2017-12-23'], dtype='datetime64[ns]', freq=None) 
# pd.to_datetime → 多个时间数据转换时间戳索引

date1 = [datetime(2015,6,1),datetime(2015,7,1),datetime(2015,8,1),datetime(2015,9,1),datetime(2015,10,1)]
date2 = ['2017-2-1','2017-2-2','2017-2-3','2017-2-4','2017-2-5','2017-2-6']
print(date1)
print(date2)
t1 = pd.to_datetime(date2)
t2 = pd.to_datetime(date2)
print(t1)
print(t2)
# 多个时间数据转换为 DatetimeIndex

date3 = ['2017-2-1','2017-2-2','2017-2-3','hello world!','2017-2-5','2017-2-6']
t3 = pd.to_datetime(date3, errors = 'ignore')
print(t3,type(t3))
# 当一组时间序列中夹杂其他格式数据,可用errors参数返回
# errors = 'ignore':不可解析时返回原始输入,这里就是直接生成一般listofindex

t4 = pd.to_datetime(date3, errors = 'coerce')
print(t4,type(t4))
# errors = 'coerce'(强迫变成DatetimeIndex),缺失值返回NaT(Not a Time),结果为DatetimeIndex
[datetime.datetime(2015, 6, 1, 0, 0), datetime.datetime(2015, 7, 1, 0, 0), datetime.datetime(2015, 8, 1, 0, 0), datetime.datetime(2015, 9, 1, 0, 0), datetime.datetime(2015, 10, 1, 0, 0)]
['2017-2-1', '2017-2-2', '2017-2-3', '2017-2-4', '2017-2-5', '2017-2-6']
DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', '2017-02-04',
               '2017-02-05', '2017-02-06'],
              dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', '2017-02-04',
               '2017-02-05', '2017-02-06'],
              dtype='datetime64[ns]', freq=None)
Index(['2017-2-1', '2017-2-2', '2017-2-3', 'hello world!', '2017-2-5',
       '2017-2-6'],
      dtype='object') 
DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', 'NaT', '2017-02-05',
               '2017-02-06'],
              dtype='datetime64[ns]', freq=None) 
#作业answer
#作业1

import numpy as np
import pandas as pd
lst=[]
for i in range(1,32):
    lst.append('2017-12-%i'%i)
print(lst)

d=pd.to_datetime(lst)
print(d)

print('月中日期为:\n',d[16])
['2017-12-1', '2017-12-2', '2017-12-3', '2017-12-4', '2017-12-5', '2017-12-6', '2017-12-7', '2017-12-8', '2017-12-9', '2017-12-10', '2017-12-11', '2017-12-12', '2017-12-13', '2017-12-14', '2017-12-15', '2017-12-16', '2017-12-17', '2017-12-18', '2017-12-19', '2017-12-20', '2017-12-21', '2017-12-22', '2017-12-23', '2017-12-24', '2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28', '2017-12-29', '2017-12-30', '2017-12-31']
DatetimeIndex(['2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04',
               '2017-12-05', '2017-12-06', '2017-12-07', '2017-12-08',
               '2017-12-09', '2017-12-10', '2017-12-11', '2017-12-12',
               '2017-12-13', '2017-12-14', '2017-12-15', '2017-12-16',
               '2017-12-17', '2017-12-18', '2017-12-19', '2017-12-20',
               '2017-12-21', '2017-12-22', '2017-12-23', '2017-12-24',
               '2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',
               '2017-12-29', '2017-12-30', '2017-12-31'],
              dtype='datetime64[ns]', freq=None)
月中日期为:
 2017-12-17 00:00:00
f=open(r'C:\Users\HASEE\Desktop\pythontest\timetest','w')
stwrite='20150101,20150102,20150103,20150104,20150105,20150106,20150107,20150108'
f.write(stwrite)
f.close()

f=open(r'C:\Users\HASEE\Desktop\pythontest\timetest','r')
stread=f.read()
lst=stread.split(',')
print(lst)
print(pd.to_datetime(lst))
['20150101', '20150102', '20150103', '20150104', '20150105', '20150106', '20150107', '20150108']
DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04',
               '2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08'],
              dtype='datetime64[ns]', freq=None)
'''
【课程2.10】  Pandas时间戳索引:DatetimeIndex

核心:pd.date_range()

'''
'\n【课程2.10】  Pandas时间戳索引:DatetimeIndex\n\n核心:pd.date_range()\n\n'
# pd.DatetimeIndex()即listoftimestamp
#与TimeSeries时间序列 listoftimestamp为index的序列

# 直接生成时间戳索引,支持str、datetime.datetime
# !!!单个时间戳为Timestamp,多个时间戳为DatetimeIndex

rng=pd.DatetimeIndex(['12/1/2017','12/2/2017','12/3/2017','12/4/2017','12/5/2017'])
print(rng,type(rng))
print(rng[0],type(rng[0]))

# !!!以DatetimeIndex为index的Series,为TimeSries,时间序列
st=pd.Series(np.random.rand(len(rng)),index=rng)
print(st,type(st))
print(st.index)
DatetimeIndex(['2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04',
               '2017-12-05'],
              dtype='datetime64[ns]', freq=None) 
2017-12-01 00:00:00 
2017-12-01    0.745300
2017-12-02    0.876081
2017-12-03    0.142995
2017-12-04    0.938318
2017-12-05    0.043834
dtype: float64 
DatetimeIndex(['2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04',
               '2017-12-05'],
              dtype='datetime64[ns]', freq=None)
# pd.date_range()-日期范围:生成日期范围格式为DatetimeIndex 即listoftimestamp
# 2种生成方式:①start + end; ②start/end + periods
# 默认频率:day
rng1=pd.date_range(start='1/1/2017',end='1/10/2017', normalize=True)
rng2=pd.date_range(start='1/1/2017',periods=10)
rng3=pd.date_range(end = '1/30/2017 15:00:00', periods = 10)
 # 增加了时、分、秒
 
print(rng1,type(rng1))
print(rng2)
print(rng3)
print('--------------')
# pd.date_range(start=None, end=None, periods=None, freq='D', tz=None, normalize=False, name=None, closed=None, **kwargs)
# start:开始时间
# end:结束时间
# periods:偏移量
# freq:频率,默认天,pd.date_range()默认频率为日历日,pd.bdate_range()默认频率为工作日
# tz:时区

rng4 = pd.date_range(start = '1/1/2017 15:30', periods = 10, name = 'hello world!', normalize = True)
print(rng4)
print('-------')
# normalize:时间参数值正则化到午夜时间戳(这里最后就直接变成0:00:00,并不是15:30:00)
# name:索引对象名称

print(pd.date_range('20170101','20170104'))  # 20170101也可读取
print(pd.date_range('20170101','20170104',closed = 'right'))
print(pd.date_range('20170101','20170104',closed = 'left'))
print('-------')
# closed:默认为None的情况下,左闭右闭,left则只左闭右开,right则只左开右闭

print(pd.bdate_range('20170101','20170107'))
# pd.bdate_range()默认频率为工作日


print(list(pd.date_range(start = '1/1/2017', periods = 10)))
# 直接转化为list,元素为Timestamp
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
               '2017-01-09', '2017-01-10'],
              dtype='datetime64[ns]', freq='D') 
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
               '2017-01-09', '2017-01-10'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-21 15:00:00', '2017-01-22 15:00:00',
               '2017-01-23 15:00:00', '2017-01-24 15:00:00',
               '2017-01-25 15:00:00', '2017-01-26 15:00:00',
               '2017-01-27 15:00:00', '2017-01-28 15:00:00',
               '2017-01-29 15:00:00', '2017-01-30 15:00:00'],
              dtype='datetime64[ns]', freq='D')
--------------
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
               '2017-01-09', '2017-01-10'],
              dtype='datetime64[ns]', name='hello world!', freq='D')
-------
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq='D')
-------
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
               '2017-01-06'],
              dtype='datetime64[ns]', freq='B')
[Timestamp('2017-01-01 00:00:00', freq='D'), Timestamp('2017-01-02 00:00:00', freq='D'), Timestamp('2017-01-03 00:00:00', freq='D'), Timestamp('2017-01-04 00:00:00', freq='D'), Timestamp('2017-01-05 00:00:00', freq='D'), Timestamp('2017-01-06 00:00:00', freq='D'), Timestamp('2017-01-07 00:00:00', freq='D'), Timestamp('2017-01-08 00:00:00', freq='D'), Timestamp('2017-01-09 00:00:00', freq='D'), Timestamp('2017-01-10 00:00:00', freq='D')]
# pd.date_range()-日期范围:频率(1)
print(pd.date_range('2017/1/1','2017/1/4'))  # 默认freq = 'D':每日历日
print(pd.date_range('2017/1/1','2017/1/4', freq = 'B'))  # B:每工作日
print(pd.date_range('2017/1/1','2017/1/2', freq = 'H'))  # H:每小时
print(pd.date_range('2017/1/1 12:00','2017/1/1 12:10', freq = 'T'))  # T/MIN:每分
print(pd.date_range('2017/1/1 12:00:00','2017/1/1 12:00:10', freq = 'S'))  # S:每秒
print(pd.date_range('2017/1/1 12:00:00','2017/1/1 12:00:10', freq = 'L'))  # L:每毫秒(千分之一秒)
print(pd.date_range('2017/1/1 12:00:00','2017/1/1 12:00:10', freq = 'U'))  # U:每微秒(百万分之一秒)

print(pd.date_range('20170101','20170201',freq='W-MON'))
# W-MON:从指定星期几开始算起,每周
# 星期几缩写:MON/TUE/WED/THU/FRI/SAT/SUN

print(pd.date_range('2017/1/1','2017/5/1',freq='wom-2mon'))
# WOM-2MON:每月的第几个星期几开始算,这里是每月第二个星期一
#week of month-2mon
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='B')
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 01:00:00',
               '2017-01-01 02:00:00', '2017-01-01 03:00:00',
               '2017-01-01 04:00:00', '2017-01-01 05:00:00',
               '2017-01-01 06:00:00', '2017-01-01 07:00:00',
               '2017-01-01 08:00:00', '2017-01-01 09:00:00',
               '2017-01-01 10:00:00', '2017-01-01 11:00:00',
               '2017-01-01 12:00:00', '2017-01-01 13:00:00',
               '2017-01-01 14:00:00', '2017-01-01 15:00:00',
               '2017-01-01 16:00:00', '2017-01-01 17:00:00',
               '2017-01-01 18:00:00', '2017-01-01 19:00:00',
               '2017-01-01 20:00:00', '2017-01-01 21:00:00',
               '2017-01-01 22:00:00', '2017-01-01 23:00:00',
               '2017-01-02 00:00:00'],
              dtype='datetime64[ns]', freq='H')
DatetimeIndex(['2017-01-01 12:00:00', '2017-01-01 12:01:00',
               '2017-01-01 12:02:00', '2017-01-01 12:03:00',
               '2017-01-01 12:04:00', '2017-01-01 12:05:00',
               '2017-01-01 12:06:00', '2017-01-01 12:07:00',
               '2017-01-01 12:08:00', '2017-01-01 12:09:00',
               '2017-01-01 12:10:00'],
              dtype='datetime64[ns]', freq='T')
DatetimeIndex(['2017-01-01 12:00:00', '2017-01-01 12:00:01',
               '2017-01-01 12:00:02', '2017-01-01 12:00:03',
               '2017-01-01 12:00:04', '2017-01-01 12:00:05',
               '2017-01-01 12:00:06', '2017-01-01 12:00:07',
               '2017-01-01 12:00:08', '2017-01-01 12:00:09',
               '2017-01-01 12:00:10'],
              dtype='datetime64[ns]', freq='S')
DatetimeIndex([       '2017-01-01 12:00:00', '2017-01-01 12:00:00.001000',
               '2017-01-01 12:00:00.002000', '2017-01-01 12:00:00.003000',
               '2017-01-01 12:00:00.004000', '2017-01-01 12:00:00.005000',
               '2017-01-01 12:00:00.006000', '2017-01-01 12:00:00.007000',
               '2017-01-01 12:00:00.008000', '2017-01-01 12:00:00.009000',
               ...
               '2017-01-01 12:00:09.991000', '2017-01-01 12:00:09.992000',
               '2017-01-01 12:00:09.993000', '2017-01-01 12:00:09.994000',
               '2017-01-01 12:00:09.995000', '2017-01-01 12:00:09.996000',
               '2017-01-01 12:00:09.997000', '2017-01-01 12:00:09.998000',
               '2017-01-01 12:00:09.999000',        '2017-01-01 12:00:10'],
              dtype='datetime64[ns]', length=10001, freq='L')
DatetimeIndex([       '2017-01-01 12:00:00', '2017-01-01 12:00:00.000001',
               '2017-01-01 12:00:00.000002', '2017-01-01 12:00:00.000003',
               '2017-01-01 12:00:00.000004', '2017-01-01 12:00:00.000005',
               '2017-01-01 12:00:00.000006', '2017-01-01 12:00:00.000007',
               '2017-01-01 12:00:00.000008', '2017-01-01 12:00:00.000009',
               ...
               '2017-01-01 12:00:09.999991', '2017-01-01 12:00:09.999992',
               '2017-01-01 12:00:09.999993', '2017-01-01 12:00:09.999994',
               '2017-01-01 12:00:09.999995', '2017-01-01 12:00:09.999996',
               '2017-01-01 12:00:09.999997', '2017-01-01 12:00:09.999998',
               '2017-01-01 12:00:09.999999',        '2017-01-01 12:00:10'],
              dtype='datetime64[ns]', length=10000001, freq='U')
DatetimeIndex(['2017-01-02', '2017-01-09', '2017-01-16', '2017-01-23',
               '2017-01-30'],
              dtype='datetime64[ns]', freq='W-MON')
DatetimeIndex(['2017-01-09', '2017-02-13', '2017-03-13', '2017-04-10'], dtype='datetime64[ns]', freq='WOM-2MON')
# pd.date_range()-日期范围:频率(2)

print(pd.date_range('2017','2018', freq = 'M'))  
print(pd.date_range('2017','2020', freq = 'Q-DEC'))  
print(pd.date_range('2017','2020', freq = 'A-DEC')) 
print('------')
# M:每月最后一个日历日

# Q-月:指定月为季度末,每个季度末最后一月的最后一个日历日
#quarter-month
# 所以Q-月只有三种情况:1-4-7-10,2-5-8-11,3-6-9-12

# A-月:每年指定月份的最后一个日历日
#annual-month
# 月缩写:JAN/FEB/MAR/APR/MAY/JUN/JUL/AUG/SEP/OCT/NOV/DEC

print(pd.date_range('2017','2018', freq = 'BM'))  
print(pd.date_range('2017','2020', freq = 'BQ-DEC'))  
print(pd.date_range('2017','2020', freq = 'BA-DEC')) 
print('------')
# BM:每月最后一个工作日
# BQ-月:指定月为季度末,每个季度末最后一月的最后一个工作日
# BA-月:每年指定月份的最后一个工作日

DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',
               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
               '2017-09-30', '2017-10-31', '2017-11-30', '2017-12-31'],
              dtype='datetime64[ns]', freq='M')
DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-30', '2017-12-31',
               '2018-03-31', '2018-06-30', '2018-09-30', '2018-12-31',
               '2019-03-31', '2019-06-30', '2019-09-30', '2019-12-31'],
              dtype='datetime64[ns]', freq='Q-DEC')
DatetimeIndex(['2017-12-31', '2018-12-31', '2019-12-31'], dtype='datetime64[ns]', freq='A-DEC')
------
DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-28',
               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
               '2017-09-29', '2017-10-31', '2017-11-30', '2017-12-29'],
              dtype='datetime64[ns]', freq='BM')
DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-29', '2017-12-29',
               '2018-03-30', '2018-06-29', '2018-09-28', '2018-12-31',
               '2019-03-29', '2019-06-28', '2019-09-30', '2019-12-31'],
              dtype='datetime64[ns]', freq='BQ-DEC')
DatetimeIndex(['2017-12-29', '2018-12-31', '2019-12-31'], dtype='datetime64[ns]', freq='BA-DEC')
------
# pd.date_range()-日期范围:复合频率

print(pd.date_range('2017/1/1','2017/2/1', freq = '7D'))  # 7天
print(pd.date_range('2017/1/1','2017/1/2', freq = '2h30min'))  # 2小时30分钟
print(pd.date_range('2017','2018', freq = '2M'))  # 2月,每月最后一个日历日
DatetimeIndex(['2017-01-01', '2017-01-08', '2017-01-15', '2017-01-22',
               '2017-01-29'],
              dtype='datetime64[ns]', freq='7D')
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 02:30:00',
               '2017-01-01 05:00:00', '2017-01-01 07:30:00',
               '2017-01-01 10:00:00', '2017-01-01 12:30:00',
               '2017-01-01 15:00:00', '2017-01-01 17:30:00',
               '2017-01-01 20:00:00', '2017-01-01 22:30:00'],
              dtype='datetime64[ns]', freq='150T')
DatetimeIndex(['2017-01-31', '2017-03-31', '2017-05-31', '2017-07-31',
               '2017-09-30', '2017-11-30'],
              dtype='datetime64[ns]', freq='2M')
# 针对TimeSeries的操作- asfreq:时期频率转换
ts=pd.Series(np.random.rand(4),
             index=pd.date_range('20170101','20170104'))
print(ts)
print(ts.asfreq('4h',method='ffill'))
# 改变频率,这里是D改为4H
# method:插值模式,None不插值,ffill(front fill)用之前值填充
#bfill(behind fill)用之后值填充
2017-01-01    0.435837
2017-01-02    0.669417
2017-01-03    0.198081
2017-01-04    0.139088
Freq: D, dtype: float64
2017-01-01 00:00:00    0.435837
2017-01-01 04:00:00    0.435837
2017-01-01 08:00:00    0.435837
2017-01-01 12:00:00    0.435837
2017-01-01 16:00:00    0.435837
2017-01-01 20:00:00    0.435837
2017-01-02 00:00:00    0.669417
2017-01-02 04:00:00    0.669417
2017-01-02 08:00:00    0.669417
2017-01-02 12:00:00    0.669417
2017-01-02 16:00:00    0.669417
2017-01-02 20:00:00    0.669417
2017-01-03 00:00:00    0.198081
2017-01-03 04:00:00    0.198081
2017-01-03 08:00:00    0.198081
2017-01-03 12:00:00    0.198081
2017-01-03 16:00:00    0.198081
2017-01-03 20:00:00    0.198081
2017-01-04 00:00:00    0.139088
Freq: 4H, dtype: float64
# pd.date_range()-日期范围:超前/滞后数据

ts = pd.Series(np.random.rand(4),
              index = pd.date_range('20170101','20170104'))
print(ts)

print(ts.shift(2))
print(ts.shift(-2))
print('------')
# 正数:数值后移(滞后);负数:数值前移(超前)

per = ts/ts.shift(1) - 1
print(per)
print('------')
# 计算变化百分比,这里计算:该时间戳与上一个时间戳相比(ts,shift(1)将昨天的数据移到今天的日期下),变化百分比

print(ts.shift(2, freq = 'D'))#将所有时间戳+2D
print(ts.shift(2, freq = 'T'))#将所有时间戳+2min
# 加上freq参数:对时间戳进行位移!!!,而不是对数值进行位移
2017-01-01    0.696547
2017-01-02    0.529174
2017-01-03    0.404338
2017-01-04    0.741592
Freq: D, dtype: float64
2017-01-01         NaN
2017-01-02         NaN
2017-01-03    0.696547
2017-01-04    0.529174
Freq: D, dtype: float64
2017-01-01    0.404338
2017-01-02    0.741592
2017-01-03         NaN
2017-01-04         NaN
Freq: D, dtype: float64
------
2017-01-01         NaN
2017-01-02   -0.240289
2017-01-03   -0.235907
2017-01-04    0.834088
Freq: D, dtype: float64
------
2017-01-03    0.696547
2017-01-04    0.529174
2017-01-05    0.404338
2017-01-06    0.741592
Freq: D, dtype: float64
2017-01-01 00:02:00    0.696547
2017-01-02 00:02:00    0.529174
2017-01-03 00:02:00    0.404338
2017-01-04 00:02:00    0.741592
Freq: D, dtype: float64
#总结2.10
#1pd.DatetimeIndex 即listoftimestamp
#2TimeSeries 即index为DatetimeIndex的Series
#3pd.date_range()即DatetimeIndex生成器
#4pd.date_range()常见freq用法
#对TimeSeries的操作 ts.asfreq()以及ts.shift()
#作业answer

#answer 1
#生成TimeSeries就两步
#1生成dateindex(一般用date_range)
#2生成TimeSeries

#1生成dateindex
di1=pd.date_range(start='2017-01',periods=5,freq='D')
#生成TimeSeries
ts1=pd.Series(data=np.random.rand(5),index=di1)
print(ts1)


di2=pd.date_range(start='2017-01-31',periods=5,freq='Q-Jan')
ts2=pd.Series(data=np.random.rand(5),index=di2)
print(ts2)


di3=pd.date_range(start='2017-12-01',periods=4,freq='10t')
tdf=pd.DataFrame(data=np.random.rand(4,4),
                 columns=['value1','value2','value3','value4'],
                 index=di3
                )
print(tdf)
2017-01-01    0.324245
2017-01-02    0.115597
2017-01-03    0.407691
2017-01-04    0.996330
2017-01-05    0.495723
Freq: D, dtype: float64
2017-01-31    0.650247
2017-04-30    0.484225
2017-07-31    0.464591
2017-10-31    0.641705
2018-01-31    0.795587
Freq: Q-JAN, dtype: float64
                       value1    value2    value3    value4
2017-12-01 00:00:00  0.494249  0.925107  0.769139  0.950450
2017-12-01 00:10:00  0.170856  0.021507  0.119379  0.391953
2017-12-01 00:20:00  0.848345  0.312652  0.699692  0.931733
2017-12-01 00:30:00  0.559365  0.542697  0.125891  0.055697
di1=pd.date_range(start='2017 05 01 12',periods=5,freq='10min')
ts1=pd.Series(data=np.random.rand(5),index=di1)
print(ts1)

ts2=ts1.asfreq(freq='5t',method='ffill')
print(ts2)
2017-05-01 12:00:00    0.478802
2017-05-01 12:10:00    0.290849
2017-05-01 12:20:00    0.895803
2017-05-01 12:30:00    0.109338
2017-05-01 12:40:00    0.697874
Freq: 10T, dtype: float64
2017-05-01 12:00:00    0.478802
2017-05-01 12:05:00    0.478802
2017-05-01 12:10:00    0.290849
2017-05-01 12:15:00    0.290849
2017-05-01 12:20:00    0.895803
2017-05-01 12:25:00    0.895803
2017-05-01 12:30:00    0.109338
2017-05-01 12:35:00    0.109338
2017-05-01 12:40:00    0.697874
Freq: 5T, dtype: float64
'''
【课程2.11】  Pandas时期:Period 和TimeStamp不同 
Period代表时期 
TimeStamp代表时间点


核心:pd.Period()
    pd.period_range()

'''
'\n【课程2.11】  Pandas时期:Period 和TimeStamp不同 \nPeriod代表时期 \nTimeStamp代表时间点\n\n\n核心:pd.Period()\n    pd.period_range()\n\n'
# pd.Period()创建单个时期period 类比于pd.TimeStamp()
p = pd.Period('2017',freq='m')
print(p,type(p))

print(p+1)
print(p-2)
print(pd.Period('2012',freq='a')-1)
# 通过加减整数,按照 freq的单位,将单个period整体移动
2017-01 
2017-02
2016-11
2011
# pd.period_range()创建多个period listofperiods

prng=pd.period_range('20110101','20120101',freq='m')
print(prng,type(prng))
print(prng[0],type(prng[0]))
# 数据格式为PeriodIndex,单个数值为Period

ts = pd.Series(np.random.rand(len(prng)), index = prng)
print(ts,type(ts))
print(ts.index)
# 时间序列
PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',
             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',
             '2012-01'],
            dtype='period[M]', freq='M') 
2011-01 
2011-01    0.270300
2011-02    0.528558
2011-03    0.626401
2011-04    0.812753
2011-05    0.353819
2011-06    0.150295
2011-07    0.461801
2011-08    0.533077
2011-09    0.054538
2011-10    0.833580
2011-11    0.894351
2011-12    0.383080
2012-01    0.181327
Freq: M, dtype: float64 
PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',
             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',
             '2012-01'],
            dtype='period[M]', freq='M')
# asfreq:频率转换
#TimeStamp和period freq的区别

#TimeStamp freq变的是采取区间,但是采取点单位一般不变
#eg freq=“M” 采取区间虽然是M,但是采取点还是每个M的最后1D
#除非 freq变得比D小 采取单位才会变 
#eg freq='t' 采取单位变为分钟

#period变的是采取单位
#freq=‘M’则采取单位也是月
#但是period把freq变小不会增加数量 eg 2010-1 2010-2 freq='m'
#把freq='d'后不会有DateTimeSeries那样fill的方法 只能选start or end 挑一个月的一天

p = pd.Period('2017','A')
print(p)
print(p.asfreq('M', how = 'start'))  # 也可写 how = 's'
print(p.asfreq('D', how = 'end'))  # 也可写 how = 'e'
print('--------')
# 通过.asfreq(freq, method=None, how=None)方法转换成别的频率

prng = pd.period_range('2017','2018',freq = 'M')
ts1 = pd.Series(np.random.rand(len(prng)), index = prng)
ts2 = pd.Series(np.random.rand(len(prng)), index = prng.asfreq('D', how = 'start'))
print(ts1.head(),len(ts1))
print(ts2.head(),len(ts2))
# asfreq也可以转换TIMESeries的index

2017
2017-01
2017-12-31
--------
2017-01    0.404408
2017-02    0.064027
2017-03    0.821275
2017-04    0.995918
2017-05    0.281146
Freq: M, dtype: float64 13
2017-01-01    0.920413
2017-02-01    0.320414
2017-03-01    0.847685
2017-04-01    0.261885
2017-05-01    0.145836
Freq: D, dtype: float64 13
# 时间戳与时期之间的转换:pd.to_period()、pd.to_timestamp()
rng = pd.date_range('2017/1/1', periods = 10, freq = 'M')
prng = pd.period_range('2017','2018', freq = 'M')
print(rng)
print(prng)
print('----------')

#既可以直接在 listoftimestamp 和 listofperiods 之间转换
print(rng.to_period(freq='a'))

#也可以直接对ts进行转换
# 每月最后一日,转化为每月
ts1 = pd.Series(np.random.rand(len(rng)), index = rng)
print(ts1)
print(ts1.to_period())

ts2 = pd.Series(np.random.rand(len(prng)), index = prng)
print(ts2)
print(ts2.to_timestamp(how='S'))
# 每月,转化为每月第一天
DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',
               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
               '2017-09-30', '2017-10-31'],
              dtype='datetime64[ns]', freq='M')
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
             '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
             '2018-01'],
            dtype='period[M]', freq='M')
----------
PeriodIndex(['2017', '2017', '2017', '2017', '2017', '2017', '2017', '2017',
             '2017', '2017'],
            dtype='period[A-DEC]', freq='A-DEC')
2017-01-31    0.079345
2017-02-28    0.699806
2017-03-31    0.193150
2017-04-30    0.380251
2017-05-31    0.218401
2017-06-30    0.282242
2017-07-31    0.450215
2017-08-31    0.708842
2017-09-30    0.928040
2017-10-31    0.697656
Freq: M, dtype: float64
2017-01    0.079345
2017-02    0.699806
2017-03    0.193150
2017-04    0.380251
2017-05    0.218401
2017-06    0.282242
2017-07    0.450215
2017-08    0.708842
2017-09    0.928040
2017-10    0.697656
Freq: M, dtype: float64
2017-01    0.543350
2017-02    0.787547
2017-03    0.842127
2017-04    0.020524
2017-05    0.981086
2017-06    0.391997
2017-07    0.639751
2017-08    0.654858
2017-09    0.571412
2017-10    0.136572
2017-11    0.537329
2017-12    0.834277
2018-01    0.497881
Freq: M, dtype: float64
2017-01-01    0.543350
2017-02-01    0.787547
2017-03-01    0.842127
2017-04-01    0.020524
2017-05-01    0.981086
2017-06-01    0.391997
2017-07-01    0.639751
2017-08-01    0.654858
2017-09-01    0.571412
2017-10-01    0.136572
2017-11-01    0.537329
2017-12-01    0.834277
2018-01-01    0.497881
Freq: MS, dtype: float64
#作业answer
p1=pd.period_range(start='2017-01',end='2017-05',freq='m')
ts1=pd.Series(data=np.random.rand(5),index=p1)
print(ts1)

p2=pd.period_range(start='2017-01-1',periods=5,freq='2h')
ts2=pd.Series(data=np.random.rand(5),index=p2)
print(ts2)
2017-01    0.884941
2017-02    0.499193
2017-03    0.157167
2017-04    0.964902
2017-05    0.059249
Freq: M, dtype: float64
2017-01-01 00:00    0.865604
2017-01-01 02:00    0.262509
2017-01-01 04:00    0.087934
2017-01-01 06:00    0.834944
2017-01-01 08:00    0.915583
Freq: 2H, dtype: float64
'''
#总结2.11
1 p = pd.Period('2017',freq='m')
创建单个period

2 prng=pd.period_range('20110101','20120101',freq='m')
创建list of period

3p = pd.Period('2017','A')
print(p)
print(p.asfreq('M', how = 'start')) 
更改period的频率

4pd.to_period()、pd.to_timestamp() 
timestamp和period之间的转换
既可以直接在 listoftimestamp 和 listofperiods 之间转换
也可以直接对ts进行转换


'''

"\n#总结2.11\n1 p = pd.Period('2017',freq='m')\n创建单个period\n\n2 prng=pd.period_range('20110101','20120101',freq='m')\n创建list of period\n\n3p = pd.Period('2017','A')\nprint(p)\nprint(p.asfreq('M', how = 'start')) \n更改period的频率\n\n4pd.to_period()、pd.to_timestamp() \ntimestamp和period之间的转换\n既可以直接在 listoftimestamp 和 listofperiods 之间转换\n也可以直接对ts进行转换\n\n\n"
'''
【课程2.12】  时间序列 - 索引及切片

TimeSeries是Series的一个子类,所以Series索引及数据选取方面的方法基本一样

同时TimeSeries通过时间序列有更便捷的方法做索引和切片
 
'''
'\n【课程2.12】  时间序列 - 索引及切片\n\nTimeSeries是Series的一个子类,所以Series索引及数据选取方面的方法基本一样\n\n同时TimeSeries通过时间序列有更便捷的方法做索引和切片\n \n'
#索引
from datetime import datetime

rng=pd.date_range('2017/1','2017/3')
ts=pd.Series(np.random.rand(len(rng)),index=rng)
print(ts.head())

#1 ts既支持基本下标位置索引
print(ts[0])
print(ts[:2])
print('-----')

#2 ts也支持 支持各种时间字符串,以及datetime.datetime
print(ts['2017/1/2'])
print(ts['20170103'])
print(ts['1/10/2017'])
print(ts[datetime(2017,1,20)])
print('-----')

# 时间序列由于按照时间先后排序,故不用考虑顺序问题
# 索引方法同样适用于Dataframe
2017-01-01    0.659568
2017-01-02    0.151966
2017-01-03    0.288654
2017-01-04    0.583045
2017-01-05    0.769449
Freq: D, dtype: float64
0.65956785928946
2017-01-01    0.659568
2017-01-02    0.151966
Freq: D, dtype: float64
-----
0.15196600248594228
0.2886538761520884
0.0023193408918270597
0.20779042212900634
-----
# 切片

rng = pd.date_range('2017/1','2017/3',freq = '12H')
ts = pd.Series(np.random.rand(len(rng)), index = rng)

# 支持各种时间字符串时间切片,和Series按照数字index索引原理一样,也是末端包含
print(ts['2017/1/5':'2017/1/10'])
print('-----')

# 传入月,直接得到一个切片
print(ts['2017/2'].head())
2017-01-05 00:00:00    0.318971
2017-01-05 12:00:00    0.188438
2017-01-06 00:00:00    0.187238
2017-01-06 12:00:00    0.980087
2017-01-07 00:00:00    0.161376
2017-01-07 12:00:00    0.215822
2017-01-08 00:00:00    0.935772
2017-01-08 12:00:00    0.880521
2017-01-09 00:00:00    0.168200
2017-01-09 12:00:00    0.276908
2017-01-10 00:00:00    0.795692
2017-01-10 12:00:00    0.096272
Freq: 12H, dtype: float64
-----
2017-02-01 00:00:00    0.536472
2017-02-01 12:00:00    0.063015
2017-02-02 00:00:00    0.159657
2017-02-02 12:00:00    0.614637
2017-02-03 00:00:00    0.984499
Freq: 12H, dtype: float64
# 重复索引的时间序列

# index有重复,is_unique检查 → values唯一,index不唯一
dates = pd.DatetimeIndex(['1/1/2015','1/2/2015','1/3/2015','1/4/2015','1/1/2015','1/2/2015'])
ts = pd.Series(np.random.rand(6), index = dates)
print(ts)
print(ts.is_unique,ts.index.is_unique)
print('-----')

print(ts['20150101'],type(ts['20150101']))
print(ts['20150104'],type(ts['20150104']))
print('-----')
# index有重复的将返回多个值,类型为Series

# 通过groupby做分组,重复的值这里用平均值处理
print(ts.groupby(level=0).mean)

2015-01-01    0.510849
2015-01-02    0.791319
2015-01-03    0.552804
2015-01-04    0.678201
2015-01-01    0.571113
2015-01-02    0.584924
dtype: float64
True False
-----
2015-01-01    0.510849
2015-01-01    0.571113
dtype: float64 
2015-01-04    0.678201
dtype: float64 
-----
>
#作业answer
di=pd.date_range(start='2017/12/1',end='2017/12/5 12:00:00',freq='12H')
ts=pd.DataFrame(data=np.random.rand(10,3),index=di,columns=['value1','value2','value3'])
print(ts)
print('---------------')
#1
print(ts.iloc[0:3])
print('---------------')
#2
print(ts.loc['2017-12-4 12:00:00'])
print('---------------')
#3
print(ts.loc['2017-12-4' : '2017-12-5'])
                       value1    value2    value3
2017-12-01 00:00:00  0.134941  0.767928  0.157234
2017-12-01 12:00:00  0.869153  0.842300  0.358291
2017-12-02 00:00:00  0.728543  0.706397  0.737571
2017-12-02 12:00:00  0.433480  0.464147  0.987012
2017-12-03 00:00:00  0.196458  0.474911  0.709055
2017-12-03 12:00:00  0.076818  0.907870  0.886325
2017-12-04 00:00:00  0.784369  0.985650  0.652622
2017-12-04 12:00:00  0.058532  0.784861  0.318300
2017-12-05 00:00:00  0.943825  0.025305  0.848250
2017-12-05 12:00:00  0.636666  0.767742  0.773654
---------------
                       value1    value2    value3
2017-12-01 00:00:00  0.134941  0.767928  0.157234
2017-12-01 12:00:00  0.869153  0.842300  0.358291
2017-12-02 00:00:00  0.728543  0.706397  0.737571
---------------
value1    0.058532
value2    0.784861
value3    0.318300
Name: 2017-12-04 12:00:00, dtype: float64
---------------
                       value1    value2    value3
2017-12-04 00:00:00  0.784369  0.985650  0.652622
2017-12-04 12:00:00  0.058532  0.784861  0.318300
2017-12-05 00:00:00  0.943825  0.025305  0.848250
2017-12-05 12:00:00  0.636666  0.767742  0.773654
'''
#总结2.12
1 ts的索引
2 ts的切片
3 ts判断是否重复(data index)
'''

'\n#总结2.12\n1 ts的索引\n2 ts的切片\n3 ts判断是否重复(data index)\n'
# 重采样:.resample()
# 创建一个以天为频率的TimeSeries,重采样为按2天为频率

rng = pd.date_range('20170101', periods = 12)
ts = pd.Series(np.arange(12), index = rng)
print(ts)
print('-----')

ts_re = ts.resample('5D')
ts_re2 = ts.resample('5D').sum()

#如果不给聚合方法只能得到一个生成器
print(ts_re, type(ts_re))
print('-----')

print(ts_re2, type(ts_re2))
print('-----')
# ts.resample('5D'):得到一个重采样构建器,频率改为5天
# ts.resample('5D').sum():得到一个新的聚合后的Series,聚合方式为求和
# freq:重采样频率 → ts.resample('5D')
# .sum():聚合方法

2017-01-01     0
2017-01-02     1
2017-01-03     2
2017-01-04     3
2017-01-05     4
2017-01-06     5
2017-01-07     6
2017-01-08     7
2017-01-09     8
2017-01-10     9
2017-01-11    10
2017-01-12    11
Freq: D, dtype: int32
-----
DatetimeIndexResampler [freq=<5 * Days>, axis=0, closed=left, label=left, convention=start, base=0] 
-----
2017-01-01    10
2017-01-06    35
2017-01-11    21
Freq: 5D, dtype: int32 
-----
# 降采样 多项降为少项 时间点变大

rng=pd.date_range('20170101',periods=12)
ts=pd.Series(np.arange(1,13),index=rng)
print(ts)
print('---------')

print(ts.resample('5D').sum(),'→ 默认\n')
print(ts.resample('5D', closed = 'left').sum(),'→ left\n')
print(ts.resample('5D', closed = 'right').sum(),'→ right\n')
print('-----')
# closed:各时间段哪一端是闭合(即包含)的,默认 左闭右闭
# 详解:这里values为0-11,按照5D重采样 → [1,2,3,4,5],[6,7,8,9,10],[11,12]
#resample()降采样时必须要按照指定频率补全,如果无值也要补全
# left指定间隔左闭右开 → [1,2,3,4,5,6),[6,7,8,9,10,11),[11,12,(后面为补充的空值)13,14,15,16)
# right指定间隔左开右闭 → (27,28,29,30,31,(前面为补充的上个月空值)1],(1,2,3,4,5,6],(6,7,8,9,10,11],(11,12,13,14,15,16]

print(ts.resample('5D', label = 'left').sum(),'→ leftlabel\n')
print(ts.resample('5D', label = 'right').sum(),'→ rightlabel\n')
# label:聚合值的index,默认为取左
# 值采样认为默认(这里closed默认)
2017-01-01     1
2017-01-02     2
2017-01-03     3
2017-01-04     4
2017-01-05     5
2017-01-06     6
2017-01-07     7
2017-01-08     8
2017-01-09     9
2017-01-10    10
2017-01-11    11
2017-01-12    12
Freq: D, dtype: int32
---------
2017-01-01    15
2017-01-06    40
2017-01-11    23
Freq: 5D, dtype: int32 → 默认

2017-01-01    15
2017-01-06    40
2017-01-11    23
Freq: 5D, dtype: int32 → left

2016-12-27     1
2017-01-01    20
2017-01-06    45
2017-01-11    12
Freq: 5D, dtype: int32 → right

-----
2017-01-01    15
2017-01-06    40
2017-01-11    23
Freq: 5D, dtype: int32 → leftlabel

2017-01-06    15
2017-01-11    40
2017-01-16    23
Freq: 5D, dtype: int32 → rightlabel
# 升采样及插值

rng = pd.date_range('2017/1/1 0:0:0', periods = 5, freq = 'H')
ts = pd.DataFrame(np.arange(15).reshape(5,3),
                  index = rng,
                  columns = ['a','b','c'])
print(ts)
print('---------')

print(ts.resample('15T').asfreq())
print(ts.resample('15T').ffill())
print(ts.resample('15T').bfill())
# 低频转高频,主要是如何插值
# .asfreq():不做填充,返回Nan
# .ffill():向上填充
# .bfill():向下填充
                      a   b   c
2017-01-01 00:00:00   0   1   2
2017-01-01 01:00:00   3   4   5
2017-01-01 02:00:00   6   7   8
2017-01-01 03:00:00   9  10  11
2017-01-01 04:00:00  12  13  14
---------
                        a     b     c
2017-01-01 00:00:00   0.0   1.0   2.0
2017-01-01 00:15:00   NaN   NaN   NaN
2017-01-01 00:30:00   NaN   NaN   NaN
2017-01-01 00:45:00   NaN   NaN   NaN
2017-01-01 01:00:00   3.0   4.0   5.0
2017-01-01 01:15:00   NaN   NaN   NaN
2017-01-01 01:30:00   NaN   NaN   NaN
2017-01-01 01:45:00   NaN   NaN   NaN
2017-01-01 02:00:00   6.0   7.0   8.0
2017-01-01 02:15:00   NaN   NaN   NaN
2017-01-01 02:30:00   NaN   NaN   NaN
2017-01-01 02:45:00   NaN   NaN   NaN
2017-01-01 03:00:00   9.0  10.0  11.0
2017-01-01 03:15:00   NaN   NaN   NaN
2017-01-01 03:30:00   NaN   NaN   NaN
2017-01-01 03:45:00   NaN   NaN   NaN
2017-01-01 04:00:00  12.0  13.0  14.0
                      a   b   c
2017-01-01 00:00:00   0   1   2
2017-01-01 00:15:00   0   1   2
2017-01-01 00:30:00   0   1   2
2017-01-01 00:45:00   0   1   2
2017-01-01 01:00:00   3   4   5
2017-01-01 01:15:00   3   4   5
2017-01-01 01:30:00   3   4   5
2017-01-01 01:45:00   3   4   5
2017-01-01 02:00:00   6   7   8
2017-01-01 02:15:00   6   7   8
2017-01-01 02:30:00   6   7   8
2017-01-01 02:45:00   6   7   8
2017-01-01 03:00:00   9  10  11
2017-01-01 03:15:00   9  10  11
2017-01-01 03:30:00   9  10  11
2017-01-01 03:45:00   9  10  11
2017-01-01 04:00:00  12  13  14
                      a   b   c
2017-01-01 00:00:00   0   1   2
2017-01-01 00:15:00   3   4   5
2017-01-01 00:30:00   3   4   5
2017-01-01 00:45:00   3   4   5
2017-01-01 01:00:00   3   4   5
2017-01-01 01:15:00   6   7   8
2017-01-01 01:30:00   6   7   8
2017-01-01 01:45:00   6   7   8
2017-01-01 02:00:00   6   7   8
2017-01-01 02:15:00   9  10  11
2017-01-01 02:30:00   9  10  11
2017-01-01 02:45:00   9  10  11
2017-01-01 03:00:00   9  10  11
2017-01-01 03:15:00  12  13  14
2017-01-01 03:30:00  12  13  14
2017-01-01 03:45:00  12  13  14
2017-01-01 04:00:00  12  13  14
#作业answer
di=pd.date_range(start='20170101',end='20170110')
ts1=pd.Series(data=np.random.rand(10),index=di)
print(ts1)
print('---------')

ts2=ts1.resample('3D',label='right',closed='right').mean()
print(ts2)
print('---------')

ts3=ts1.resample('12H').ffill()
print(ts3)
2017-01-01    0.528825
2017-01-02    0.595066
2017-01-03    0.721156
2017-01-04    0.637974
2017-01-05    0.449776
2017-01-06    0.430048
2017-01-07    0.256290
2017-01-08    0.493641
2017-01-09    0.272481
2017-01-10    0.878957
Freq: D, dtype: float64
---------
2017-01-01    0.528825
2017-01-04    0.651399
2017-01-07    0.378704
2017-01-10    0.548360
Freq: 3D, dtype: float64
---------
2017-01-01 00:00:00    0.528825
2017-01-01 12:00:00    0.528825
2017-01-02 00:00:00    0.595066
2017-01-02 12:00:00    0.595066
2017-01-03 00:00:00    0.721156
2017-01-03 12:00:00    0.721156
2017-01-04 00:00:00    0.637974
2017-01-04 12:00:00    0.637974
2017-01-05 00:00:00    0.449776
2017-01-05 12:00:00    0.449776
2017-01-06 00:00:00    0.430048
2017-01-06 12:00:00    0.430048
2017-01-07 00:00:00    0.256290
2017-01-07 12:00:00    0.256290
2017-01-08 00:00:00    0.493641
2017-01-08 12:00:00    0.493641
2017-01-09 00:00:00    0.272481
2017-01-09 12:00:00    0.272481
2017-01-10 00:00:00    0.878957
Freq: 12H, dtype: float64
'''
总结
1 ts降采样
2 ts升采样
'''
'\n总结\n1 ts降采样\n2 ts升采样\n'

你可能感兴趣的:((三篇长文让你玩6Pandas)数据分析入门_PART2常用工具包_CH02数据分析工具:Pandas__Part02(时间序列全面分析))