python-ARIMA

import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
import itertools
import numpy as np
import seaborn as sn

# 读入数据
df = pd.read_csv('D:\Download\BitCoin.csv',nrows=40000)
df['Timestamp'] = pd.to_datetime(df['Timestamp'],unit='s')
pd.set_option('display.max_rows', 100)
a = df[df['Open'].isnull()==False]
a= a.set_index('Timestamp')

# 画图
plt.plot(a['Open'])
plt.xticks(rotation=70)

# 判断稳定性
def testStationarity(timeSer):

    stationarity = False

    dftest = adfuller(timeSer)
    dfoutput = pd.Series(dftest[:4], index=['Test Statistic', 'p-value', 'lags', 'nobs'])

    for key, value in dftest[4].items():
        dfoutput['Critical values (%s)' % key] = value

    if dfoutput['Test Statistic'] < dfoutput['Critical values (5%)']:
        if dfoutput['p-value'] < 0.1:
            stationarity = True

    return stationarity

print('是否为平稳序列',testStationarity(a['Open']))

# 纯随机性检验(白噪声检验),白噪声无法预测,因为所有自相关接近零
acorr_ljungbox(a['Open'], lags=1)[1]  #返回p-value,小于0.05则不是白噪音


# 判断p,q
def p_q_choice(timeSer, nlags=40, alpha=.05):

    kwargs = {'nlags': nlags, 'alpha': alpha}
    acf_x, confint = acf(timeSer, **kwargs)
    acf_px, confint2 = pacf(timeSer, **kwargs)

    confint = confint - confint.mean(axis=1)[:, None]
    confint2 = confint2 - confint2.mean(1)[:, None]
    
    p = 999
    q = 999
    
    for key2, x, y, z in zip(range(nlags), acf_px, confint2[:,0], confint2[:,1]):
        if x > y and x < z:
            p = key2
            break

    for key1, x, y, z in zip(range(nlags), acf_x, confint[:,0], confint[:,1]):
        if x > y and x < z:
            q = key1
            break

    return p, q

print(p_q_choice(a['Open'],nlags=10,alpha=0.05))
# p截尾,用AR模型

# 画图看自相关和偏自相关
plot_acf(a['Open'],lags=40,alpha=0.05)
plot_pacf(a['Open'],lags=40,alpha=0.05)

# 评估模型好坏
mod = ARIMA(a['Open'],order=(1,0,1)).fit()
res = mod.predict(start='2012-01-25 18:41:00',end='2012-01-27 16:14:00')[1:]
pre = mod.fittedvalues.iloc[400:]
res_bic = mod.bic

# 寻找最优p,q,bic最小的
p_min = 0
q_min = 0
p_max = 5
q_max = 5

results_bic = pd.DataFrame(index=['AR{}'.format(i) for i in range(p_min,p_max+1)],
                           columns=['MA{}'.format(i) for i in range(q_min,q_max+1)])
for p,q in itertools.product(range(p_max+1),range(q_max+1)):
    
    if p==0 and q==0:
        results_bic.loc['AR{}'.format(p),'MA{}'.format(q)]=np.nan
        continue   
    mod = ARIMA(a['Open'],order=(p,0,q)).fit()
    res_bic = mod.bic
    results_bic.loc['AR{}'.format(p),'MA{}'.format(q)] = res_bic

sn.heatmap(results_bic.fillna(0).values,annot=True,cmap="YlGnBu")


# 画图
plt.xticks(rotation=70)
plt.xlabel('time')
plt.ylabel('price')
plt.plot(a['Open'])
plt.plot(pre)

# 预测未来
mod.forecast(5) 

你可能感兴趣的:(python)