ARMA模型就是这两者的联合,它的主要缺点是忽略了在金融市场时序数据中经常可见的波动聚簇现象(Volatility Clustering),模型公式如下:
y t = α 1 y t − 1 + α 2 y t − 2 + . . . + α p y t − p + ω t + β 1 ω t − 1 + . . . + β q ω t − q y_t=\alpha_1y_{t-1}+\alpha_2y_{t-2}+...+\alpha_py_{t-p}+\omega_t+\beta_1\omega_{t-1}+...+\beta_q\omega_{t-q} yt=α1yt−1+α2yt−2+...+αpyt−p+ωt+β1ωt−1+...+βqωt−q
= ∑ i = 1 p α i y t − i + ω t + ∑ i = 1 q β i ω t − i =\sum_{i=1}^{p}\alpha_iy_{t-i}+\omega_t+\sum_{i=1}^{q}\beta_i\omega_{t-i} =i=1∑pαiyt−i+ωt+i=1∑qβiωt−i
# Simulate an ARMA(2, 2) model with alphas=[0.5,-0.25] and betas=[0.5,-0.3]
max_lag = 30
n = int(5000) # lots of samples to help estimates
burn = int(n/10) # number of samples to discard before fit
alphas = np.array([0.5, -0.25])
betas = np.array([0.5, -0.3])
ar = np.r_[1, -alphas]
ma = np.r_[1, betas]
arma22 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=burn)
_ = tsplot(arma22, lags=max_lag)
mdl = smt.ARMA(arma22, order=(2, 2)).fit(
maxlag=max_lag, method='mle', trend='nc', burnin=burn)
下面,我们模拟一个ARMA(3,2)过程,然后用ARMA模型来建模,搜索参数并给出赤池信息量(Akaike Information Criterion)AIC最低的p,q。
# Simulate an ARMA(3, 2) model with alphas=[0.5,-0.25,0.4] and betas=[0.5,-0.3]
max_lag = 30
n = int(5000)
burn = 2000
alphas = np.array([0.5, -0.25, 0.4])
betas = np.array([0.5, -0.3])
ar = np.r_[1, -alphas]
ma = np.r_[1, betas]
arma32 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=burn)
_ = tsplot(arma32, lags=max_lag)
# pick best order by aic
# smallest aic value wins
best_aic = np.inf
best_order = None
best_mdl = None
rng = range(5)
for i in rng:
for j in rng:
tmp_mdl = smt.ARMA(arma32, order=(i, j)).fit(method='mle', trend='nc')
tmp_aic = tmp_mdl.aic
if tmp_aic < best_aic:
best_aic = tmp_aic
best_order = (i, j)
best_mdl = tmp_mdl
except: continue
print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))
aic: 14266.72269 | order: (3, 2)
_ = tsplot(best_mdl.resid, lags=max_lag)
best_aic = np.inf
best_order = None
best_mdl = None
Y = indexs_logret['国内股票']
rng = range(5)
for i in rng:
for j in rng:
tmp_mdl = smt.ARMA(Y, order=(i, j)).fit(method='mle', trend='nc')
tmp_aic = tmp_mdl.aic
if tmp_aic < best_aic:
best_aic = tmp_aic
best_order = (i, j)
best_mdl = tmp_mdl
except: continue
print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))
aic: -6601.86081 | order: (3, 2)
_ = tsplot(best_mdl.resid, lags=max_lag)
Y = indexs_logret['香港股票']
rng = range(5)
for i in rng:
for j in rng:
tmp_mdl = smt.ARMA(Y, order=(i, j)).fit(method='mle', trend='nc')
tmp_aic = tmp_mdl.aic
if tmp_aic < best_aic:
best_aic = tmp_aic
best_order = (i, j)
best_mdl = tmp_mdl
except: continue
print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))
aic: -7640.79631 | order: (2, 2)
Y = indexs_logret['国内债券']
rng = range(5)
for i in rng:
for j in rng:
tmp_mdl = smt.ARMA(Y, order=(i, j)).fit(method='mle', trend='nc')
tmp_aic = tmp_mdl.aic
if tmp_aic < best_aic:
best_aic = tmp_aic
best_order = (i, j)
best_mdl = tmp_mdl
except: continue
print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))
aic: -14105.28291 | order: (3, 1)
Y = indexs_logret['国内货币']
rng = range(5)
for i in rng:
for j in rng:
tmp_mdl = smt.ARMA(Y, order=(i, j)).fit(method='mle', trend='nc')
tmp_aic = tmp_mdl.aic
if tmp_aic < best_aic:
best_aic = tmp_aic
best_order = (i, j)
best_mdl = tmp_mdl
except: continue
print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))
aic: -19062.72857 | order: (3, 4)