以ARIMA模型为例介绍时间序列算法在python中是如何实现的,一下是应用Python语言建模步骤:
“””
Created on Mon Apr 2 16:45:36 2018
@author: houy
“”“
import pandas as pd
discfile = ‘arima_data.xlsx’
data = pd.read_excel(discfile,index_col = 0)
print(data.head())
print(‘\n Data Types:’)
print(data.dtypes)
import matplotlib.pyplot as plt
plt.rcParams[‘font.sans-serif’] = [‘SimHei’] #用来正常显示中文标签
plt.rcParams[‘axes.unicode_minus’] = False #用来正常显示负号
data.plot()
plt.show()
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data).show() #显示出很强的自相关性
from statsmodels.tsa.stattools import adfuller as ADF
print(u’原始序列的ADF检验结果为:’,ADF(data[u’销量’]))
D_data = data.diff().dropna()
D_data.columns= [u’销量差分’]
D_data.plot() #时序图
plt.show()
plot_acf(D_data).show() #自相关图 差分后的序列迅速落入区间内,并向0靠拢,序列没有自相关性
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show() #偏自相关图 差分后的序列没有显示出骗子相关性
print(u’差分序列的ADF检验结果为:’,ADF(D_data[u’销量差分’])) #p值小于0.05
from statsmodels.stats.diagnostic import acorr_ljungbox
print(u’差分序列的白噪声检验结果为:’,acorr_ljungbox(D_data,lags=1)) #返回统计量和p值
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(111)
diff1 = data.diff(1)
diff1.plot(ax = ax1)
fig = plt.figure(figsize = (12,8))
ax2 = fig.add_subplot(112)
diff2 = data.diff(2)
diff2.plot(ax = ax2)
import statsmodels.api as sm
dta = data.diff(1)[1:]
fig = plt.figure(figsize = (12,8))
ax1 = fig.add_subplot(211)
fig1 = sm.graphics.tsa.plot_acf(dta[u’销量’],lags=10,ax=ax1)
ax2 = fig.add_subplot(212)
fig2 = sm.graphics.tsa.plot_pacf(dta[u’销量’],lags=10,ax=ax2)
”’
使用AIC准则进行模型选择,有限考虑AIC值小的那个模型
(其中L为似然函数,k为参数数量,n为观察数)
AIC = -2 ln(L) + 2 k 中文名字:赤池信息量 akaike information criterion
BIC = -2 ln(L) + ln(n)*k 中文名字:贝叶斯信息量 bayesian information criterion
HQ = -2 ln(L) + ln(ln(n))*k hannan-quinn criterion
”’
arma_mod20 = sm.tsa.ARMA(data,(2,0)).fit()
print(arma_mod20.aic,arma_mod20.bic,arma_mod20.hqic)
arma_mod01 = sm.tsa.ARMA(data,(0,1)).fit()
print(arma_mod01.aic,arma_mod01.bic,arma_mod01.hqic)
arma_mod10 = sm.tsa.ARMA(data,(1,0)).fit()
print(arma_mod10.aic,arma_mod10.bic,arma_mod10.hqic)
from statsmodels.graphics.api import qqplot
resid = arma_mod01.resid
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
fig = qqplot(resid,line =’q’,ax=ax,fit = True)
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_pacf(arma_mod01.resid.values.squeeze(),lags = 10,ax = ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(arma_mod01.resid,lags = 10,ax = ax2)
”’
它只使用于检验一阶自相关性。因为自相关系数ρ的值介于-1和1之间,
所以 0≤DW≤4。并且DW=O=>ρ=1 即存在正自相关性
DW=4<=>ρ=-1 即存在负自相关性
DW=2<=>ρ=0 即不存在(一阶)自相关性
因此,当DW值显著的接近于O或4时,则存在自相关性,而接近于2时,则不存在(一阶)自相关性。
”’
print(sm.stats.durbin_watson(arma_mod01.resid.values))
import numpy as np
”’
输出有点问题,还未解决
r,q,p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
datap = np.c_[range(1,36), r[1:], q, p]
table = pd.DataFrame(datap, columns=[‘lag’, “AC”, “Q”, “Prob(>Q)”])
print(table.set_index(‘lag’))
”’
predict_sunspots = arma_mod01.predict(‘2015-2-07’,’2015-2-15’,dynamic = True)
fig, ax = plt.subplots(figsize = (12,8))
print(predict_sunspots)
predict_sunspots[0] += data[‘2015-02-06’:][u’销量’]
data = pd.DataFrame(data)
for i in range(len(predict_sunspots)-1):
predict_sunspots[i+1] = predict_sunspots[i] + predict_sunspots[i+1]
print(predict_sunspots)
ax = data.ix[‘2015’:].plot(ax=ax)
predict_sunspots.plot(ax=ax)
plt.show()