DS-GA 3001.001 课程时间序列建模
时间序列 { X t } \{X_t\} {Xt}
均值 μ X ( t ) = E ( X t ) \mu_X(t)=\mathbb{E}(X_t) μX(t)=E(Xt)
协方差 R X ( t , u ) = cov ( X t , X u ) R_X(t,u)=\text{cov}(X_t,X_u) RX(t,u)=cov(Xt,Xu)
自相关函数(Autocorrelation Function) ρ X ( t , u ) = R X ( t , u ) R X ( t , t ) R X ( u , u ) \rho_X(t,u)=\frac{R_X(t,u)}{\sqrt{R_X(t,t)R_X(u,u)}} ρX(t,u)=RX(t,t)RX(u,u)RX(t,u)
交叉协方差(cross-covariance) R X , Y ( t , u ) = cov ( X t , Y u ) R_{X,Y}(t,u)=\text{cov}(X_t,Y_u) RX,Y(t,u)=cov(Xt,Yu)
交叉相关系数 ρ X , Y ( t , u ) = R X , Y ( t , u ) R X , Y ( t , u ) R X , Y ( u , u ) \rho_{X,Y}(t,u)=\frac{R_{X,Y}(t,u)}{\sqrt{R_{X,Y}(t,u)R_{X,Y}(u,u)}} ρX,Y(t,u)=RX,Y(t,u)RX,Y(u,u)RX,Y(t,u)
def acf_impl(x, nlags):
"""
Your implementation for the Autocorrelation Function.
Your implementation will be checked against statsmodels.tsa.stattools.acf.
@param x: a 1-d numpy array
@param nlags: an integer
@return a 1-d numpy array with (nlags+1) elements.
The first element denotes the acf at lag = 0 (1.0 by definition).
"""
assert len(x) > nlags
x_bar = np.mean(x)
output = []
for lag in range(nlags+1):
top_len = len(x) - lag
gamma = np.sum((x[lag:]-x_bar)*(x[:top_len]-x_bar))/len(x)
output.append(gamma)
return np.array(output)/output[0]
w t ∼ N ( 0 , σ 2 ) w_t \sim N(0,\sigma^2) wt∼N(0,σ2)
# Question 1.20
n = 500 # 50
mean = 0
std = 1
lag = 20
# create white noise
w_t = np.random.normal(mean, std, size=n)
# plot white noise
plt.plot(w_t)
plt.title("white noise w_t")
plt.show()
# calculate acf
acf_val = acf(x=w_t, nlags=lag)
plot_acf(x=w_t, lags=lag, title="ACF w_t")
plt.show()
采样点为50的自相关函数与理论自相关函数差异较大
v t = 1 3 ( w t , w t + 1 , w t + 2 ) v_t=\frac{1}{3}(w_t,w_{t+1},w_{t+2}) vt=31(wt,wt+1,wt+2)
# Question 1.21
n = 500
mean = 0
std = 1
lag = 20
# create white noise
w_t = np.random.normal(mean, std, size=n+2)
# create moving average
v_t = np.mean(np.array([w_t[:n], w_t[1:n+1], w_t[2:n+2]]), 0)
# plot white noise
plt.figure(1)
plt.subplot(211)
plt.plot(w_t)
plt.title("w_t")
# plot moving average
plt.subplot(212)
plt.plot(v_t)
plt.title("v_t")
# calculate acf
acf_val = acf(x=v_t, nlags=lag, fft=False)
plot_acf(x=v_t, lags=lag)
plt.show()
v t = 2 cos ( 2 π t 50 + 0.6 π ) + w t v_t = 2 \text{cos}(\frac{2\pi t}{50} + 0.6\pi) + w_t vt=2cos(502πt+0.6π)+wt
# Question 1.23
n = 500
mean = 0
std = 1
lag = 100
# create white noise
w_t = np.random.normal(mean, std, size=n)
# create signal w. noise
v_t = 2 * np.cos(2 * np.pi * np.linspace(1,n, num=n) / 50.0 + 0.6 * np.pi) + w_t
# plot white noise
plt.figure(1)
plt.subplot(211)
plt.plot(w_t)
plt.title("w_t")
# plot signal with noise
plt.subplot(212)
plt.plot(v_t)
plt.title("v_t")
# plot acf
acf_val = acf(x=v_t, nlags=lag, fft=False)
plot_acf(x=v_t, lags=lag, title="ACF v_t")
plt.show()
模式是具有周期性,信号的周期性导致了这种模式
x t ∼ N ( 0 , σ x 2 ) x_t \sim N(0, \sigma_x^2) xt∼N(0,σx2) y t = 2 x t − 5 + w t , w t ∼ N ( 0 , σ x 2 ) y_t = 2x_{t-5} + w_t, w_t \sim N(0, \sigma_x^2) yt=2xt−5+wt,wt∼N(0,σx2)
# statsmodels.graphics.tsaplots doesn't have plotting function for CCF so I have to write my own.
def plot_ccf(x, y, ax=None, lags=None, alpha=.05, use_vlines=True, unbiased=False,
fft=False, vlines_kwargs={}, title='Cross-correlation', zero=True, **kwargs):
fig, ax = utils.create_mpl_ax(ax)
lags, nlags, irregular = statsmodels.graphics.tsaplots._prepare_data_corr_plot(x, lags, zero)
confint = None
ccf_val = ccf(x, y)
if lags is not None:
ccf_val = ccf_val[:nlags+1]
statsmodels.graphics.tsaplots._plot_corr(ax, title, ccf_val, confint, lags, irregular, use_vlines, vlines_kwargs, **kwargs)
return fig
# Cross-correlation synthetic Example
n = 50
mean = 0
std = 1
lag=40
true_h = 5
x_t = np.random.normal(mean, std, size=n+5)
y_t = 2.0 * x_t[5:] + np.random.normal(mean, std, size=n)
x_t = x_t[:n]
# plot the original data
plt.plot(x_t)
plt.title("x_t")
plt.show()
plt.plot(y_t)
plt.title("y_t")
plt.show()
# plot acf
plot_acf(x=x_t, lags=lag, title="ACF x_t")
plot_acf(x=y_t, lags=lag, title="ACF y_t")
plt.show()
# plot ccf
ccf_val = ccf(y_t, x_t)
plot_ccf(x_t, y_t, title="CCF", lags=10)
plt.show()
交叉相关系数最大值滞后时段一致
soi = np.array(pd.read_csv("https://raw.githubusercontent.com/charlieblue17/timeseries2018/master/data/soi.csv")["x"])
rec = np.array(pd.read_csv("https://raw.githubusercontent.com/charlieblue17/timeseries2018/master/data/rec.csv")["x"])
# plot data
plt.plot(soi)
plt.title("soi")
plt.show()
plt.plot(rec)
plt.title("rec")
plt.show()
# plot acf
plot_acf(x=soi, lags=lag, title="ACF soi")
plot_acf(x=rec, lags=lag, title="ACF rec")
plt.show()
# plot ccf
ccf_val = ccf(soi, rec)
plot_ccf(soi, rec, title="CCF", lags = 40)
plt.show()