2022-04-02

####2019####

library(tseries)

library(forecast)

library(TSA)

setwd('//Users//wangxinran//Desktop//4064')

beer <- ts(OzBeer,start = c(1956,1), frequency = 4)

lbeer <- log(beer)

#(a)

plot(stl(beer, s.window = 20, t.window = 20))

plot(stl(lbeer, s.window = 20, t.window = 20))

#log后的seasonal的variance变得小且稳定

#(b)

lbeer.d1 = diff(lbeer) # removing trend δ

lbeer.D1 = diff(lbeer, lag = 4) # removing seasonal δ^4

lbeer.d1D1 = diff(lbeer.d1, lag = 4) # removing trend and removing seasonal, then the series appears stationary

#δδ^4

plot(lbeer.d1D1)#看起来比较稳定

layout(mat= matrix(c(1,2,3,4,5,6), nrow =2, byrow=TRUE))

acf(lbeer.d1); acf(lbeer.D1); acf(lbeer.d1D1)

pacf(lbeer.d1); pacf(lbeer.D1); pacf(lbeer.d1D1)

#removing trend能看出来有seasonal

#seems staionary, ready to fit model

#(c)

## how to chose the order: (from the 3rd plots in b)## SARIMA(2,1,1)*(0,1,1)4.

## d,D:稳定之后,看之前做了几次d,几次D

## p,P:稳定之后的pacf

## q,Q:稳定之后的acf

## 进入蓝线就视为0,进去之前的那个是lag after, 没标就是从1开始

## p=2:In pacf, 2 lags are outside of the interval

## d=1:differencing once

## q=1:the first lag in ACF is outside.

## P=0:in PACF, the 4th lag is inside, same as 8th ect.

## D=1: seasnoal differencing once.

## Q=in ACF, 4th lag is outside, but others are inside.

fit1=arima(lbeer,order=c(2,1,1),seasonal = c(0,1,1))

fit1

## AIC=-513.44

#(d)

tsdiag(fit1)

## 标准化残差绘制在第一个子图中。

## 残差的模式似乎不规则,并且在 0 附近移动。

##第二个是模型残差的ACF。

## ACF 是tail-off,除了lag 0,没有任何滞后在区间之外,检测模型还不错。

## LB 测试,H0:残差是独立的。

## 所有 p 值都显着大于 0.05,我们不能拒绝 H0。 因此,残差是 independent。

#(e)

anew <- window(beer, start = c(1975,1))

plot(decompose(anew))

layout(mat= matrix(c(1,2,3), nrow =3, byrow=TRUE))

plot(anew);acf(anew);pacf(anew)

#Reduction to Stationary

anew.d1 = diff(anew) # removing trend δ

anew.D1 = diff(anew, lag = 4) # removing seasonal δ^4

anew.d1D1 = diff(anew.d1, lag = 4) # removing trend and removing seasonal, then the series appears stationary

layout(mat= matrix(c(1,2,3,4,5,6), nrow =2, byrow=TRUE))

acf(anew.d1); acf(anew.D1); acf(anew.d1D1)

pacf(anew.d1); pacf(anew.D1); pacf(anew.d1D1)

plot(anew.d1D1)

mod1=arima(anew,order=c(2,1,1),seasonal = c(0,1,1))#用原数据

AIC(mod1)#645.6816 AIC BIC都是越小越好

tsdiag(mod1)

cpgram(residuals(mod1), main= "SARIMA(2,1,1)x(0,1,1)[4]")

mod2=auto.arima(anew,trace=F)

#ARIMA(1,0,2)(0,1,1)[4]

AIC(mod2)#645.2691

tsdiag(mod2)

cpgram(residuals(mod2), main= "SARIMA(0,1,1)(0,1,1)[12]")

plot(armasubsets(anew.d1D1,5,5))

mod3=arima(anew.d1D1, order=c(2,1,4))

AIC(mod3)#655.7698

tsdiag(mod3)

cpgram(residuals(mod3), main= "ARIMA(2,1,4)")

#Q2

Dubuque.ts=ts(Dubuque,start=c(1964,1),frequency=12)

#(1)

auto.arima(Dubuque.ts)

#ARIMA(0,0,0)(2,1,0)[12]

mod1=arima(Dubuque.ts,order=c(0,0,0),seasonal = c(2,1,0))

tsdiag(mod1)

#(2)

Dubuque1975 <- c(Dubuque1975)#vector

data.test=ts(Dubuque1975,start=c(1975,1),frequency=12)

# plot forecasts:

plot(Dubuque.ts,xlim=c(1964,1976),ylim=c(10,80))

preds = predict(mod1, n.ahead = length(data.test))

lines(preds$pred,col="red")

lines(preds$pred+2*preds$se,col="red")

lines(preds$pred-2*preds$se,col="red")

lines(data.test, col="blue")

legend("topleft", legend=c("predice","se","ture"),lty=c(1,3,6),

      col=c("blue","red","green"))

####判断哪个模型更好####

#AIC BIC谁更小谁好

#cpgram(residuals(mod)) cumulative periodogram.

#tsdiag(fit1)

## 标准化残差绘制在第一个子图中。

## 残差的模式似乎不规则,并且在 0 附近移动。

##第二个是模型残差的ACF。

## ACF 是tail-off,除了lag 0,没有任何滞后在区间之外,检测模型还不错。

## LB 测试,H0:残差是独立的。

## 所有 p 值都显着大于 0.05,我们不能拒绝 H0。 因此,残差是 id。

####report####

#Total value of House sales in Cork Monthly

library(tseries)

library(forecast)

setwd('//Users//wangxinran//Desktop//4064')

####Dataset####

data=read.csv("118102037.csv")

data.sub=subset(data,Statistic=='Value of Sales')

data.ts=ts(data.sub$VALUE,frequency = 12,start = c(2010,1))

#看一眼数据什么样

plot(decompose(data.ts))

tsdisplay(data.ts)

####Reduction to Stationarity####

#select train data

#delete the last 30 data as test data

data.train = window(data.ts, start = c(2010,1), end = c(2018,12))

#train time series plot

plot(log(data.train))

plot(decompose(data.train))

acf(data.train)

pacf(data.train)

#Ljung-Box test

#judge if d=0

#when m=1

r1=cov(data.train[-1],data.train[-length(data.train)])

Q=length(data.train)*(length(data.train)+2)*(r1)^2/(length(data.train)-1)#Q=20049580

Box.test(data.train, lag = 1)

#Q>X-squared

#reject H0

#the model exhibits lack-of-fit

#d is not equal to 0

#Dickey-Fuller Test

adf.test(data.train)

#p-value = 0.2095>0.05

#the time series is not stationary

#Taking lag-1 difference to remove trend.

data.train.d1 = diff(data.train, lag = 1)

layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))

plot(data.train.d1)

acf(data.train.d1)

pacf(data.train.d1)

#ARIMA(1,1,0)(1,1,0)[12]

#Dickey-Fuller Test again

adf.test(data.train.d1)

#p-value = 0.01<0.05

#the time series is stationary

####Model fitting and Model Criticism####

#method1 用acf和pacf肉眼看

fit1 <- arima(data.train, order = c(1,1,0), list(order = c(1,1,0), period = 12))

AIC(fit1)

tsdiag(fit1)

cpgram(residuals(fit1), main= "SARIMA(1,1,0)x(1,1,0)[12]")

#good

#method 2

#find arima(p,d,q)

auto.arima(data.train,trace=F)

#ARIMA(0,1,1)(0,1,1)[12]

fit2 <- arima(data.train, order = c(0,1,1), list(order = c(0,1,1), period = 12))

AIC(fit2)

tsdiag(fit2)

cpgram(residuals(fit2), main= "SARIMA(0,1,1)(0,1,1)[12]")

#better

#method 3

#install.packages('TSA')

library(TSA)

plot(armasubsets(data.train.d1,10,10))

#ar=8  ma=6

fit3 <- arima(data.train, order = c(8,1,6))

AIC(fit3)

tsdiag(fit3)

cpgram(residuals(fit3), main= "ARIMA(8,1,6)")

#good

####forecast and discussion####

data.test = window(data.ts, start = c(2019,1))

plot(data.train,xlim=c(2010,2022),ylim=c(0,180))

pred = predict(fit2, n.ahead = length(data.test))

lines(pred$pred,col="red")

lines(pred$pred+2*pred$se,col="green")

lines(pred$pred-2*pred$se,col="green")

lines(data.test, col="blue")

#test if residuals obey normal distribution

mean(residuals(fit2))

sd(residuals(fit2))

1/sqrt(length(data.train))

shapiro.test(residuals(fit2))

#p-value = 0.001969 < 0.05

#residuals not a normal distribution

#fewest ARMA parameters 4,6 比5,5好

####精简代码####

#install.packages("tseries")

#install.packages("forecast")

#install.packages("TSA")

library(tseries)

library(forecast)

library(TSA)

####查看数据的相关性质####

frequency(co2)

cycle(co2)

time(co2)

####用 window 做数据提取####

window(co2, start = c(1959,1), end = c(1962,12))

window(co2, frequency = 4) #将整个时间序列的列数4等分,取每一份的第一个

####做平滑方法1####

co2.yr = window(co2, frequency = 1)

co2.y = c(co2.yr)

co2.x = c(time(co2.yr))

lines(co2.x, co2.y, type = "o", lwd = 3, col = "red")

## 做平滑方法2)lowess 拟合

co2.lowess = lowess(co2, f=1/3)

lines(co2.lowess$x, co2.lowess$y, lwd = 2, col = "blue")

## 做平滑方法3)moving average——用filter()

plot(co2)

lines(filter(co2, filter = rep(1,6)/6), lwd = 2, col = "red")

## 做平滑方法4

lines(ksmooth(time(数据), 数据, "normal", bandwidth=5/52))#越大越平滑

## 做平滑方法5

lines(supsmu(time(数据), 数据, span=.01))#越大越平滑

## 做平滑方法6

lines(smooth.spline(time(数据), 数据))

###时间序列的拆分####

####method1)decompose()####

plot(decompose(co2))

co2.decomp = decompose(co2)

names(co2.decomp)

plot(co2.decomp)

####method2)stl()使用低平滑将时间序列分解为季节、趋势和残差 ####

fit <- stl(log(co2), s.window = 20, t.window = 20)

plot(fit)

#s.window,t.window的数值越大,表示seasonal曲线越规律,trend的曲线越平滑,这样会造成误差越大

####绘制时间序列的子序 ####

op <- par(mfrow = c(2,2))

fit <- stl(log(co2), s.window = 20, t.window = 20)

monthplot(co2, ylab = "data", cex.axis = 0.8)

monthplot(fit, choice = "seasonal", cex.axis = 0.8)

monthplot(fit, choice = "trend", cex.axis = 0.8)

monthplot(fit, choice = "remainder", type = "h", cex.axis = 0.8)

par(op)

####作图技巧####

abline(h = , col = "", lty=2)#虚线

abline(v = , col = "", lty=1)#实线

text(12,12,"xxx",lwd = 2, cex = 1)

title(main = "")

####画实线和虚线图 ####

# method 1 先用全部数据,画虚线,做回归,再取前半部分的数据用lines描上实线

# method 2 取前半部分的数据画实线,取后半部分的数据用lines描上虚线,最后用整体数据做回

#### lag 1 correlation (start 1 end 48) ####

cor(lh[-1],lh[-48])

####lag 2 correlation ####

cor(lh[-(1:2)],lh[-(47:48)])

####决定后面图像的排列方法####

layout(mat= matrix(c(), ncol=3, byrow=TRUE))

####acf,pacf####

acf(lh, lag.max = 24)

pacf(lh, lag.max = 24)

####标acf上的前n个点####

for (i in 1:n) {

  r = round(cor(lh[-(1:i)],lh[-((49-i):48)]), digits=3)

  points(i, r, pch=16, col="red")

}

####去除趋势,去除季节性####

co2.d1 = diff(co2) # 去除趋势

co2.D1 = diff(co2, lag = 12) # 去除季节性

co2.d1D1 = diff(co2.d1, lag = 12) # 去除趋势并去除季节性,则序列平稳的

####Gaussian Noise####

g = as.ts(rnorm(100))

#### White Noise ####

w = as.ts(rt(100, df=5))

####AIC####

num = length(数据/模型)

AIC(fit)/num - log(2*pi)

BIC(fit)/num - log(2*pi)

####使用 AIC 作为变量拒绝标准的backward,有三个方法,backward最好####

mod <- lm( fmla , data = dat)

mod.aic = step(mod, direction = "backward", k = 2)

mod.bic = step(mod, direction = "backward", k = log(nrow(dat)))

plot(fitted(mod.bic), residuals(mod.bic), pch = ".")

####从 ARIMA 模型模拟,一个空列表给出了一个 ARIMA(0, 0, 0) 模型,即默认的白噪声。####

Yt = arima.sim(model = list(ar = 0.7), n = 101)

#simulates an AR(2) time series process Yt=0.7*Yt-1-0.4*Yt-2+et, with sigma(e)^2=1

Yt = arima.sim(list(ar = c(0.7, -0.4)), n=101)

####ARIMA(1,2,1)  #ar,ma 确定系数 ####

ts.sim = arima.sim(list(order = c(1,2,1), ar = 0.7, ma=0.5), n = 500)

?arima.sim

####AR(1),系数是0.7,其理论上的ACF和PACF####

AR1.acf = ARMAacf(ar = 0.7, lag.max = 25)

AR1.pacf = ARMAacf(ar = 0.7, lag.max = 25, pacf = TRUE)

####比较模拟的数据的模型,与理论模型的区别####

layout(mat= matrix(c(1,1,2:5), byrow=TRUE, ncol=2))

plot(Yt, main= "Simulated AR(1) Process")

acf(Yt, lag.max = 25, main= "Estimated ACF")

pacf(Yt, lag.max = 25, main= "Estimated PACF")

barplot(AR1.acf, col = "blue", main= "Theoretical ACF")

barplot(AR1.pacf, col = "red", main= "Theoretical PACF")

####拟合ar的φ,有几个,都是多少####

ar(lh)

####找B####

polyroot(z = c(1,-1.7,0.7) )#p(x) = z1 + z2 * x + … + z[n] * x^(n-1)

#要按升序排列

####全部代码####

#### 机房class1 ####

#1、查看数据来源

?co2

co2

#2、查看数据类型

class(co2) #time series

#3、画出时间序列图像

plot(co2, ylab = expression("Atmospheric concentration of CO"[2]), las = 1)#画出时间序列对应的图像

title(main = "co2 data set")

#4、查看数据的相关性质

frequency(co2)

cycle(co2)

time(co2)

#5、用 window 做数据提取

#用window提取在开始和结束时间之间观察到的对象x的子集。如果指定了一个频率,则按新的频率重新对该序列进行采样。

window(co2, start = c(1959,1), end = c(1962,12))

window(co2, frequency = 4) #将整个时间序列的列数4等分,取每一份的第一列

#6、画出smooth曲线

## method(1)用normal代替整体

# 取时间序列里一个比较正常的对象,用他的数据来代替整体趋势

#(比如预测0~12岁青少年的身高,有的孩子偏高,有的偏低,我们取样本里面一个比较正常(居中)的孩子,用他每年的身高来代替这一年整体的身高,这就避免了极端值的影响,使序列变得smooth了

co2.yr = window(co2, frequency = 1) # a TS of yearly totals,window返回了一个时间序列, 列数必须是frequency的整数倍

#co2.yr = window(co2, frequency = 1, start=c(1959,4))#从第四个月开始,将时间序列的列数1等分,取每一份的第一列

co2.y = c(co2.yr) # the nesting c( ) command returns a regular vector,用c()函数将其y值转化成一个普通向量

co2.x = c(time(co2.yr))#用c()函数将其x值(时间)转化成一个普通向量

lines(co2.x, co2.y, type = "o", lwd = 3, col = "red")

## method(2)lowess 拟合

co2.lowess = lowess(co2, f=1/3)        # the classic LOWESS smoother

lines(co2.lowess$x, co2.lowess$y, lwd = 2, col = "blue")

#f:frequency,越大,曲线越平滑

## method(3)moving average——用filter()

plot(co2)

lines(filter(co2, filter = rep(1,6)/6), lwd = 2, col = "red") #MA(6)

lines(filter(co2, filter = c(1,rep(2,10),1)/22),  lwd = 2, col = "blue")#MA(11),MA(2)

# 连续两个 moving average 的代码应用

# eg:MA(4),MA(2)

# 生成20个随机数

x = rnorm(20)

# method 1 直接找到filter: 1/8[1,2,2,2,1]

xx = filter(x, filter = c(1,2,2,2,1)/8)

# method 2

#先MA(2)

y = filter(x, filter = c(1,1)/2)

#再MA(4)

yy = filter(c(NA,y), filter = c(1,1,1,1)/4) #注意,要在前面加一个NA

#最后用round函数做差发现,两种方法得到的结果是一致的

round(xx - yy, 6)#6是精确度,不重要

#7、时间序列的拆分

## method(1)decompose()

?decompose # 使用移动平均线将时间序列分解为季节性、趋势和不规则成分。处理季节成分的加法或乘法。

plot(decompose(co2))

### saves and plots

co2.decomp = decompose(co2)

names(co2.decomp)

plot(co2.decomp)

## method(2)stl()函数stl()使用低平滑将时间序列分解为季节、趋势和不规则成分

fit <- stl(log(co2), s.window = 20, t.window = 20)

plot(fit)

# s.window,t.window的数值越大,表示seasonal曲线越规律,trend的曲线越平滑,这样会造成误差越大(第四个图表示误差)

#8、线性滤波

#用函数filter()对单变量时间序列或多元时间序列的每个序列分别应用线性滤波。

plot(co2)

lines(filter(co2, filter = rep(1,6)/6), lwd = 2, col = "red") #MA(6)

lines(filter(co2, filter = c(1,rep(2,10),1)/22),  lwd = 2, col = "blue")#MA(11),MA(2)

#9、绘制时间序列的子序列 monthplot()

op <- par(mfrow = c(2,2))

monthplot(co2, ylab = "data", cex.axis = 0.8)

monthplot(fit, choice = "seasonal", cex.axis = 0.8)

monthplot(fit, choice = "trend", cex.axis = 0.8)

monthplot(fit, choice = "remainder", type = "h", cex.axis = 0.8)

par(op)

#10、其他与CO2相似的数据,可以课下自己尝试

nottem

AirPassengers

#11、多个时间序列R对象 Multiple Time Series R Objects

EuStockMarkets

plot(EuStockMarkets)

plot(log(EuStockMarkets))

plot(diff(log(EuStockMarkets)))

UKDriverDeaths

plot(Seatbelts)

plot(UKDriverDeaths)

plot(stl(UKDriverDeaths, s.window=12))

#### exercise1 ####

# method 1 先用全部数据,画虚线,做回归,再取前半部分的数据用lines描上实线

co2

plot(co2, ylab = expression("CO"[2]), las = 1, lty=2)

title(main = "Xinran Wang 118102037")

co2.lowess = lowess(co2, f=1/3)        # the classic LOWESS smoother

lines(co2.lowess$x, co2.lowess$y, lwd = 2, col = "red")

abline(h = 350, col = "light grey", lty=1)

abline(v = 1988, col = "light grey", lty=1)

text(1961,352,"350 ppm",lwd = 2, cex = 1.3)

text(1990,314,"1988",lwd = 2, cex=1.3)

(co2.yr = window(co2, start = c(1959,1), end = c(1981,12)))        # a TS of yearly totals

co2.y = c(co2.yr)    # the nesting c( ) command returns a regular vector

co2.x = c(time(co2.yr))

lines(co2.x, co2.y, lwd = 3, col = "black")

# method 2 取前半部分的数据画实线,取后半部分的数据用lines描上虚线,最后用整体数据做回归

co2

(co2.1 = window(co2, start = c(1959,1), end = c(1981,12))) 

plot(co2.1, ylab = expression("CO"[2]), las = 1, xlim=c(1959,1992), ylim=c(313,360))

title(main = "Xinran Wang 118102037")

(co2.yr = window(co2, start = c(1981,1), end = c(1997,12)))        # a TS of yearly totals

co2.y = c(co2.yr)    # the nesting c( ) command returns a regular vector

co2.x = c(time(co2.yr))

lines(co2.x, co2.y, lwd = 1, lty=2)

co2.lowess = lowess(co2, f=1/3)        # the classic LOWESS smoother

lines(co2.lowess$x, co2.lowess$y, lwd = 2, col = "red")

abline(h = 350, col = "light grey", lty=1)

abline(v = 1988, col = "light grey", lty=1)

text(1961,352,"350 ppm",lwd = 2, cex = 1.3)

text(1990,314,"1988",lwd = 2, cex=1.3)

#### 机房class2 ####

###correlation###

lh

#First, plot the data.

plot(lh, type="o")

#Look at the correlation between current values and previous values (lh at lag 1).

plot( x= lag(lh,k=1), y= lh, xy.lines= FALSE, xy.labels= FALSE)

title(main= paste("r =", round(cor(lh[-1],lh[-48]), digits=3) ))

#lag 1 correlation (start 1 end 48)

cor(lh[-1],lh[-48])

#Look at the correlation between current values and values before last  (lh at lag 2).

plot( x= lag(lh,k=2), y= lh, xy.lines= FALSE, xy.labels= FALSE)

title(main= paste("r =", round(cor(lh[-(1:2)],lh[-(47:48)]), digits=3) ))

#lag 2 correlation

cor(lh[-(1:2)],lh[-(47:48)])

#用一个循环包括各种滞后,看不同滞后的correlation图像

layout(mat= matrix(seq(6), ncol=3, byrow=TRUE)) #决定后面图像的排列方式

for (i in 1:6) {


  plot( x= lag(lh,k=i), y= lh, xy.lines= FALSE, xy.labels= FALSE,


        ylab= paste("lh Series at lag ", i))


  title(main= paste("r =", round(cor(lh[-(1:i)],lh[-((49-i):48)]), digits=3) ))


}

###acf()###

acf(lh)

#The acf plot is consistent with the previous calculations, as can be verified as follows:

for (i in 1:6) {

  r = round(cor(lh[-(1:i)],lh[-((49-i):48)]), digits=3)

  points(i, r, pch=16, col="blue")

}

#评价:对于这个时间序列,滞后 1 处的样本自相关幅度很大,但是对于更高的滞后,这种相互依赖性会立即消失。

###Partial Autocorrelation ###

#偏自相关 Yt-k, Yt-k+1,...., Yt-1,Yt.消除了 Yt-k+1,...., Yt-1的影响后,Yt和Yt-k的相关系数函数

acf(lh, lag.max = 24)

pacf(lh, lag.max = 24)

acf(BJsales)

pacf(BJsales)

ldax = log( EuStockMarkets[,"DAX"] )

acf(ldax)

pacf(ldax)

ldax.d1 = diff(ldax)

acf(ldax.d1)

pacf(ldax.d1)

###Differencing###

co2.d1 = diff(co2) # removing trend

co2.D1 = diff(co2, lag = 12) # removing seasonal

co2.d1D1 = diff(co2.d1, lag = 12) # removing trend and removing seasonal, then the series appears stationary

co2.D1d1 = diff(co2.D1,lag = 1)

co2.d1D1-co2.D1d1

par(mfrow=c(2,2))

plot(co2); plot(co2.d1); plot(co2.D1); plot(co2.d1D1)

acf(co2); acf(co2.d1); acf(co2.D1); acf(co2.d1D1)

###其他尝试###

acf(BJsales)

pacf(BJsales)

#Gaussian Noise

g = as.ts(rnorm(100))

plot(g)

acf(g)

pacf(g)

# White Noise

w = as.ts(rt(100, df=5))

plot(w)

acf(g)

pacf(g)

####exercise2####

layout(mat= matrix(c(1,1,2,3), ncol=2, byrow=TRUE)) #c(1,1,2,3)第一个位置和第二个位置都是第一幅图,第三个位置是第二幅图,第四个位置是第三幅图

plot(lh, type="b",main="Luteinizing Hormone 118102047",ylab="Hormone level",xlab="Time(10 miniutes interval)")

acf(lh)

for (i in 1:6) {

  r = round(cor(lh[-(1:i)],lh[-((49-i):48)]), digits=3)

  points(i, r, pch=16, col="blue")

}

pacf(lh)

####机房3####

install.packages("astsa")

library(astsa)

? lap

# # kill plot before moving on

par(mfrow=c(3,1))

plot(cmort, main="Cardiovascular Mortality", xlab="", ylab="")

plot(tempr, main="Temperature", xlab="", ylab="")

plot(part, main="Particulates", xlab="", ylab="")

dev.new()

pairs(cbind(Mortality=cmort, Temperature=tempr, Particulates=part))#可以看三个数据之间的相关性

# Regression

temp = tempr-mean(tempr) # center temperature

temp2 = temp^2 # square it

trend = time(cmort) # time

#给了指定的模型,用他来回归

fit = lm(cmort~ trend + temp + temp2 + part, na.action=NULL)

summary(fit)                        # regression results

summary(aov(fit))                  # ANOVA table (compare to next line)

summary(aov(lm(cmort~cbind(trend, temp, temp2, part)))) # Table 2.1

#AIC BIC 都是用来评估模型优良的标准

#AIC(fit) 和 BIC(fit)直接能求出fit模型的AIC BIC的值,但是我们想用以下公式来计算

num = length(cmort)                                    # sample size

AIC(fit)/num - log(2*pi)                                # AIC

BIC(fit)/num - log(2*pi)                                # BIC

#其他的计算方法

#BIC(fit, k=log(num))/num - log(2*pi)                  # BIC (alt method)

#(AICc = log(sum(resid(fit)^2)/num) + (num+5)/(num-5-2)) # AICc

#moving average smoother

ma3 = filter(cmort, sides=2, rep(1,2)/2)#MA(2)

ma2.2 = filter(cmort, sides=2, c(1,2,1)/4)#MA(2), after MA(2)

plot(cmort, type="p", ylab="mortality", main="Cardiovascular Mortality")

lines(ma3, col="red")

lines(ma2.2, col="blue")

#Example 2.11

wk = time(cmort) - mean(time(cmort)) # wk is essentially t/52 centered at z wk2 = wk^2

wk3 = wk^3

cs = cos(2*pi*wk)

sn = sin(2*pi*wk)

reg1 = lm(cmort~wk + wk2 + wk3, na.action=NULL)

reg2 = lm(cmort~wk + wk2 + wk3 + cs + sn, na.action=NULL)

plot(cmort, type="p", ylab="mortality")

lines(fitted(reg1))

lines(fitted(reg2))

#Example 2.12

plot(cmort, type="p", ylab="mortality")

lines(ksmooth(time(cmort), cmort, "normal", bandwidth=5/52))

lines(ksmooth(time(cmort), cmort, "normal", bandwidth=2))

#Example 2.13

par(mfrow=c(2,1))

plot(cmort, type="p", ylab="mortality", main="nearest neighbor")

lines(supsmu(time(cmort), cmort, span=.5))

lines(supsmu(time(cmort), cmort, span=.01))

plot(cmort, type="p", ylab="mortality", main="lowess")

lines(lowess(cmort, f=.02))

lines(lowess(cmort, f=2/3))

#Example 2.14

plot(cmort, type="p", ylab="mortality")

lines(smooth.spline(time(cmort), cmort))

lines(smooth.spline(time(cmort), cmort, spar=1))

#Example 2.15

par(mfrow=c(2,1), mar=c(3,2,1,0)+.5, mgp=c(1.6,.6,0))

plot(tempr, cmort, main="lowess", xlab="Temperature", ylab="Mortality")

lines(lowess(tempr,cmort))

plot(tempr, cmort, main="smoothing splines", xlab="Temperature", ylab="Morta")

lines(smooth.spline(tempr, cmort))

####exercise3####

setwd('//Users//wangxinran//Desktop//4064')

dat = matrix( scan(file="nineXvars.txt") , ncol = 10 , byrow = TRUE )

dat = as.data.frame(dat)

names(dat) = c("y", paste("X",1:9, sep=""))

dim(dat)

names(dat)

pairs(dat)

## overview of linear model

fmla = as.formula( paste("y ~ .^2 +", paste("I(X",1:9,"^2)", sep="", collapse=" + ") ))

mod <- lm( fmla , data = dat)

summary(mod)

mod.aic = step(mod, direction = "backward", k = 2)

summary(mod.aic)

mod.bic = step(mod, direction = "backward", k = log(nrow(dat)))

summary(mod.bic)

plot(fitted(mod.bic), residuals(mod.bic), pch = ".")

####机房4####

#simulates an AR(1) time series process Yt=phi*Yt-1+et, where phi=0.7, sigma(e)^2=1, n=101

alpha = 0.7

Yt = arima.sim(model = list(ar = alpha), n = 101)

?arima.sim#Simulate from an ARIMA Model,默认An empty list gives an ARIMA(0, 0, 0) model, that is white noise.

AR1.acf = ARMAacf(ar = alpha, lag.max = 25)

AR1.pacf = ARMAacf(ar = alpha, lag.max = 25, pacf = TRUE)

names(AR1.pacf) = names(AR1.acf)[-1]

layout(mat= matrix(c(1,1,2:5), byrow=TRUE, ncol=2))

plot(Yt, main= "Simulated AR(1) Process")

acf(Yt, lag.max = 25, main= "Estimated ACF")

pacf(Yt, lag.max = 25, main= "Estimated PACF")

barplot(AR1.acf, col = "blue", main= "Theoretical ACF")

barplot(AR1.pacf, col = "red", main= "Theoretical PACF")

#改变phi比较图像0.7 -0.7 0.99 -0.99,with n=101 or 1001

#simulates an AR(2) time series process Yt=0.7*Yt-1-0.4*Yt-2+et, with sigma(e)^2=1

Yt = arima.sim(list(ar = c(0.7, -0.4)), n=1001)

#Repeat the previous exercise using a shorter time series.

Yt = arima.sim(list(ar = c(0.7, 0.4)), n=1001)

#This is wrong, because 'ar' part of model is not stationary.

####画诡异图start####

#Stationarity of the AR(2) Process

n = 5000

a = runif(n, min= -1, max = 1)

b = runif(n, min= -1, max = 1)

Phi.RootsReal = cbind( a+b , -a*b )

x = runif(n, min= -1, max = 1)

y = numeric()

for(i in 1:n) y[i] = runif(1, -1*sqrt(1-(x[i])^2), sqrt(1-(x[i])^2))

Phi.RootsComplex = cbind( 2*x , -(x^2 +y^2) )

plot(Phi.RootsReal, col="blue", xlab= expression(phi[1]), ylab= expression(phi[2]))

points(Phi.RootsComplex, col="magenta")

curve(1-x, add = TRUE, lwd = 3)        # condition phi1 + phi2 < 1

curve(1+x, add = TRUE, lwd = 3)        # condition phi2 - phi1 < 1

curve(-x^2/4, add = TRUE, lwd = 3)    # condition (phi1)^2 + 4phi2 < 0

CheckComplexRoots = (Phi.RootsComplex[,1])^2 + 4* Phi.RootsComplex[,2]

all(CheckComplexRoots < 0)

#ARMA(p,q) Process

Yt = arima.sim(n = 100, list(ar = c(0.9, -0.5), ma = c(-0.2, 0.25)), sd = sqrt(2))

# mildly long-tailed

Xt = arima.sim(n = 100, list(ar = c(0.9, -0.5), ma = c(-0.2, 0.25)),

              sd = sqrt(2), rand.gen = function(n, ...) sqrt(0.2) * rt(n, df = 5))

Zt = arima.sim(list(ar = 0.7, ma = -0.7), n = 500)

####画诡异图end####

###

ar(lh)

lh.acf = acf(lh, lag.max=3)

lh.acf

names(lh.acf)

gamma.vec = matrix(data = lh.acf$acf[-1], ncol=1)

Gamma.mtx = diag(3)

diag(Gamma.mtx) = lh.acf$acf[1]

Gamma.mtx[abs(row(Gamma.mtx)-col(Gamma.mtx)) == 1] = lh.acf$acf[2]

Gamma.mtx[abs(row(Gamma.mtx)-col(Gamma.mtx)) == 2] = lh.acf$acf[3]

gamma.vec ; Gamma.mtx

solve(Gamma.mtx, gamma.vec)

(sunspot.ar <- ar(sunspot.year))

sunspot.lead = predict(sunspot.ar, n.ahead = 25)

ts.plot(sunspot.year, sunspot.lead$pred, col=c(5,2))

#ARIMA(p,d,q) Models

####exercise4####

layout(mat= matrix(c(1,1,2,3), nrow=2, byrow=TRUE))

set.seed(118102037)

ts.sim = arima.sim(list(order = c(1,2,1), ar = 0.7, ma=0.5), n = 500)

polyroot(z = c(1,-1.7,0.7) )#要按升序排列

ts.plot(ts.sim)

acf(ts.sim)

pacf(ts.sim)

####机房5####

#Case study 1

options(digits = 5, show.signif.stars = FALSE)#设置R的整数表示能力设为5位小数

?options()#允许用户设置和检查各种全局选项,这些选项影响R计算和显示结果的方式。

######################################

# code 01: look at the data

lh

######################################

# code 02: plot the data. Think: AR or MA(1)? Why?

layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))

plot(lh, type = "o", xlab = "Time (10 minutes intervals)", ylab= "Hormone Level")

acf(lh) ; pacf(lh)

######################################

# code 03: fit an AR(1) model

lh.ar1 = ar(lh, aic = FALSE, order.max = 1)

lh.ar1

names(lh.ar1)

lh.ar1$method

######################################

# code 04: exploting, we see that for

# an AR(1) phi.hat is just:

acf(lh)$acf

round(acf(lh)$acf[2], 4)

######################################

# code 05: criticize fitted AR(1)

# Think: are you satisfied with the behavior of the residuals?

resids = c(lh.ar1$resid)

acf(resids) #doesn't work, why not?

par(mfrow=c(2,2))

acf(resids[-1])

pacf(resids[-1])

qqnorm(resids)

cpgram(resids)

######################################

# code 06: criticise fitted AR(1)

# Using the arima( ) function in R...

tsdiag(lh.ar1) # doesn't work!  TRY...

lh.ar1 = arima(lh, order = c(1,0,0))

lh.ar1 # how does the output differ???

tsdiag(lh.ar1)

cpgram(residuals(lh.ar1))

######################################

# code 07: use arima function to investigate

# AR(2), AR(3), and AR(4) models. e.g.

lh.ar2 = arima(lh, order = c(2,0,0))

lh.ar2 # how does the output differ???

tsdiag(lh.ar2)

cpgram(residuals(lh.ar2))

######################################

# code 08: explore AR(p) p=1,...,8

# The "aic" option in ar( ) is a logical flag.

# If TRUE then the Akaike Information Criterion

# is used to choose the order of the AR model.

# If FALSE, the model of order "order.max" is fitted.

# For example, "order.max = 8" fits an AR(8) model.

# Also collects the AICs for AR(1),AR(2),...,AR(8) models.

# Think: which model? (think very carefully!)

lh.ARmods = ar(lh, order.max=8, aic=FALSE)

plot(seq(0,8), lh.ARmods$aic, type="b", pch=16, col="blue",


    main = "AIC for LH", xlab="Order of AR model",


    ylab="log[ AIC / AIC of AR(3) ]")

######################################

# code 09: Summary of AR(3) model..

# Compare the output...

lh.ar3 = ar(lh, order.max=3)

lh.ar3

#  y[t] = 0.6534*y[t-1] -0.0636*y[t-2] -0.2269*y[t-3] + e[t]

#  with var{ e[t] } = sigma^2 = 0.1959

lh.ar3 = arima(lh, order = c(3,0,0))

lh.ar3 #  what has changed?

lh.ar3 = arima(lh, order = c(3,0,0), method = "CSS-ML")

lh.ar3 #  what has changed?

lh.ar3 = arima(lh, order = c(3,0,0), method = "ML")

lh.ar3 #  what has changed?

lh.ar3 = arima(lh, order = c(3,0,0), method = "CSS")

lh.ar3 # what has changed?

#

######################################

# code 10: Best ARMA subset regression

# Try this at home. You will need the TSA package.

# Think: which model? (think carefully!)

library(TSA)

lh.mods = armasubsets(y = lh, nar=4, nma=4)

plot(lh.mods)

# PREAMBLE

options(digits = 5, show.signif.stars = FALSE)

######################################

# code 01: look at the data

# Think: AR or MA? What order?

LakeHuron

layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))

plot(LakeHuron, type = "o", ylab = "level (feet)")

title(main= "Annual measurements of the level of Lake Huron 1875-1972.")

acf(LakeHuron); pacf(LakeHuron)

######################################

# code 02: fit some starting models!

# Think: Which model? Why??

lake1 = arima(LakeHuron, order = c(1,0,0))

tsdiag(lake1)

lake2 = arima(LakeHuron, order = c(2,0,0))

tsdiag(lake2)

lake3 = arima(LakeHuron, order = c(3,0,0))

tsdiag(lake3)

lake4 = arima(LakeHuron, order = c(1,0,1))

tsdiag(lake4)

lake1$aic; lake2$aic; lake3$aic; lake4$aic

par(mfrow=c(2,2))

cpgram(residuals(lake1)); cpgram(residuals(lake2))

cpgram(residuals(lake3)); cpgram(residuals(lake4))

######################################

# code 01: look at the data

layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))

plot(Nile, type = "o")

lines(lowess(Nile, f=1/3), col="red", lwd=2)

acf(Nile); pacf(Nile)

######################################

# code 02: look through possible AR models

ar.aic = ar(Nile)

ar.aic # selects order 2

#how did it decide on AR(2)? try...

plot(seq(ar.aic$aic), ar.aic$aic,


    type = "b", ylab = "AIC", xlab = "order")

######################################

# code 03: fit a preliminary model

myfit = arima(Nile, c(2, 0, 0))

# Can you explain why the pvalues of the Ljung-Box

# Portmanteau statistic are approaching statistical

# significance at higher lags?

# Examine the data further. Can you do better??

#

######################################

# code 99: Best ARMA subset regression

# Try this at home. You will need the TSA package.

# Think: which model? (think carefully!)

library(TSA)

lh.mods = armasubsets(y = Nile, nar=4, nma=4)

plot(lh.mods)

######################################

# code 03: Select a working model

lake.fit = lake4

######################################

# code 04: produce forecasts...

lake.pred = predict(lake.fit, n.ahead = 12)

names(lake.pred)

lake.pred

# Here we have fitted an ARIMA(1,0,1) model to the

# Lake Huron data. We have predicted the levels of

# Lake Huron for the next 12 years. In this case,

# "Lake.pred" is a list containing two entries, the

# predicted values "Lake.pred$pred" and the standard

# errors of the prediction "Lake.pred$se"}. Using a

# rule of thumb ``prediction +/- 2*standard error,"

# an approximate 95% prediction interval can be

# calculated for these future values.

######################################

# code 05: plot forecasts...

plot(LakeHuron, xlim = c(1875, 2000), ylim = c(575,584),


    main="Lake Hurron levels and predicted values")

# First the levels of Lake Huron are plotted.

# To leave space for adding the predicted values,

# the x-axis is set from 1875 to 2000 with

# "xlim=c(1875,2000)"; the use of "ylim" is purely

# for cosmetic purposes here:

# Execute the following code line-by-line...

lake.pred = predict(lake.fit, n.ahead = 28)

lines(lake.pred$pred,col="red")

lines(lake.pred$pred+2*lake.pred$se,col="red",lty=3)

lines(lake.pred$pred-2*lake.pred$se,col="red",lty=3)

abline(h = mean(LakeHuron), col="grey", lty = 2)

# The final command draws a horizontal line at

# the mean of the Lake Huron values. What does

# this lead you to conclude you about the

# potential for long term forecasting?

######################################

# code 06: plot forecasts using one line of R code :) but less colour :(

plot(lake.fit, n.ahead = 12)

######################################

# code 01: read in the data

setwd("... wherever you keep your files")

recife = ts(data = scan("recifetemps.txt"), start = 1953, frequency = 12)

######################################

# code 02: look at the data

# Think: in words, describe the trend and seasonal patterns in the data.

recife

plot(recife, ylab="air temperature")

# and tag the months...

points(y= recife, x=time(recife), pch = as.vector(tag(recife)), cex=0.7) 

######################################

# code 03: look at the data again

plot(stl(recife, s.window=11))

# Rethink: in words, describe the trend and seasonal patterns in the data.

######################################

# code 04: differencing

y = diff(recife)

y = diff(y)

y = diff(y, lag = 12)

layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))

plot(y, main= expression((1-B)^{2}*(1-B^{12})*X[t]))

points(y, x=time(y), pch = as.vector(tag(y)), cex=0.7) 

acf(y, lag.max = 30); pacf(y, lag.max = 30)

######################################

# code 05: fit an ARIMA(p,d,q)(P,D,Q)

myfit = arima(recife, order=c(3,2,0), seasonal = c(2,1,0))

myfit

resids = residuals(myfit)

par(mfrow=c(1,2))

acf(resids, lag.max = 36)

pacf(resids, lag.max = 36)

# kill plot before proceeding

cpgram(resids)

######################################

# code 06: model criticism

# Think: are you satisfied with the behaviour of the residuals?

# Think: can you produce a better model?

#

######################################

# code 01: load the data

yield = ts(start=1950, frequency = 12, data = c(

  2.22,2.23,2.22,2.20,2.09,1.97,2.03,1.98,1.94,1.79,1.74,1.86,

  1.78,1.72,1.79,1.82,1.89,1.99,1.89,1.83,1.71,1.70,1.97,2.21,

  2.36,2.41,2.92,3.15,3.26,3.51,3.48,3.16,3.01,2.97,2.88,2.91,

  3.45,3.29,3.17,3.09,3.02,2.99,2.97,2.94,2.84,2.85,2.86,2.89,

  2.93,2.93,2.87,2.82,2.63,2.33,2.22,2.15,2.28,2.28,2.06,2.54,

  2.29,2.66,3.03,3.17,3.83,3.99,4.11,4.51,4.66,4.37,4.45,4.58,

  4.58,4.76,4.89,4.65,4.51,4.65,4.52,4.52,4.57,4.65,4.74,5.10,

  5.00,4.74,4.79,4.83,4.80,4.83,4.77,4.80,5.38,6.18,6.02,5.91,

  5.66,5.42,5.06,4.70,4.73,4.64,4.62,4.48,4.43,4.33,4.32,4.30,

  4.26,4.02,4.06,4.08,4.09,4.14,4.15,4.20,4.30,4.26,4.15,4.27,

  4.69,4.72,4.92,5.10,5.20,5.56,6.08,6.13,6.09,5.99,5.58,5.59,

  5.42,5.30,5.44,5.32,5.21,5.47,5.96,6.50,6.48,6.00,5.83,5.91,

  5.98,5.91,5.64,5.49,5.43,5.33,5.22,5.03,4.74,4.55,4.68,4.53,

  4.67,4.81,4.98,5.00,4.94,4.84,4.76,4.67,4.51,4.42,4.53,4.70,

  4.75,4.90,5.06,4.99,4.96,5.03,5.22,5.47,5.45,5.48,5.57,6.33,

  6.67,6.52,6.60,6.78,6.79,6.83,6.91,6.93,6.65,6.53,6.50,6.69,

  6.58,6.42,6.79,6.82,6.76,6.88,7.22,7.41,7.27,7.03,7.09,7.18,

  6.69,6.50,6.46,6.35,6.31,6.41,6.60,6.57,6.59,6.80,7.16,7.51,

  7.52,7.40,7.48,7.42,7.53,7.75,7.80,7.63,7.51,7.49,7.64,7.92,

  8.10,8.18,8.52,8.56,9.00,9.34,9.04,9.08,9.14,8.99,8.96,8.86,

  8.79,8.62,8.29,8.05,8.00,7.89,7.48,7.31,7.42,7.51,7.71,7.99))

######################################

# code 02: plot the data

layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))

plot(yield, type = "o", ylab= "Yield")

acf(yield) ; pacf(yield)

# [1] There is no discernible seasonal variation.

# [2] There is a marked trend, from about 2% at

# the start of the series to about 7% at the end.

# [3] The trend is by no means regular, and to

# assume and to fit a straight line would be a gross

# over simplification. The use of a trend-and-season

# model, or linear regression seems inappropriate.

# [4] These observations are confirmed by the

# spectrum, ACF, and PACF. Kill the last plot. Then...

cpgram(yield)

# suggests a Random Walk Model.

# The non-stationarity indicates that

# some sort of differencing is required.

# Kill the last plot. Then...

######################################

# code 03: difference the data, and plot...

yield.diff = diff(yield)

layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))

plot(yield.diff, type = "o", ylab = "Difference",


    main= "Month-to-Month Changes in Yield")

acf(yield.diff) ; pacf(yield)

# Kill the last plot. Then...

cpgram(yield.diff)

# These plots tell us quite a lot.

# First, the differenced series is

# now stationary, and no additional

# differencng is required. The fact

# that there is only one significant

# coefficient at lag 1 indicates an

# ARIMA(0,1,1) model.

######################################

# code 04: Fit and contrast some ARIMA fits...

fit1 = arima(yield,order=c(0,1,1))

tsdiag(fit1)

cpgram(residuals(fit1))

fit2 = arima(yield,order=c(0,1,2))

tsdiag(fit2)

cpgram(residuals(fit2))

fit3 = arima(yield,order=c(0,1,3))

tsdiag(fit3)

cpgram(residuals(fit3))

######################################

# code 05: plot forecasts...

plot(yield, xlim = c(1950, 1972), ylim = c(0,10),


    main="Yield (%) on British short term government securities")

yield.pred = predict(fit3, n.ahead = 20)

lines(yield.pred$pred,col="red")

lines(yield.pred$pred+2* yield.pred$se,col="red",lty=3)

lines(yield.pred$pred-2* yield.pred$se,col="red",lty=3)

# What do you conclude about the potential for long term forecasting here?

######################################

# code 01: look at the data

WWWusage

######################################

# code 02: plot the data. Think: AR or MA(1)? Why?

layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))

plot(WWWusage, type = "o", main = "Internet Usage per Minute",


    xlab = "Time (minute intervals)", ylab= "Number of Users")

acf(WWWusage) ; pacf(WWWusage)

######################################

# code 03: Dicky-Fuller t=Test

# If "tseries" package is available, then try:

library(tseries)

adf.test(WWWusage)

######################################

# code 04: take differences

work <- diff(WWWusage)

par(mfrow = c(2, 1)); plot(WWWusage); plot(work)

######################################

# code 05: Model Search

aics <- matrix(, 6, 6, dimnames = list(p = 0:5, q = 0:5))

for(q in 1:5) aics[1, 1+q] <- arima(WWWusage, c(0, 1, q),


                                    optim.control = list(maxit = 500))$aic

for(p in 1:5)


  for(q in 0:5) aics[1+p, 1+q] <- arima(WWWusage, c(p, 1, q),


                                        optim.control = list(maxit = 500))$aic

round(aics - min(aics, na.rm = TRUE), 2)

#

######################################

# code 06: Investigate the following models.

# ARIMA(1,1,0),  ARIMA(2,1,0), ARIMA(3,1,0), ARIMA(1,1,1),

##  Code 1

##  plot the data...

plot(nottem, main = "nottem data",ylab =  "Average monthly temperature at Nottingham Castle (deg. F)")

##  Code 2

##  a few plots to visualise the seasonal components... 

Temperature = matrix(c(nottem), nrow = 12, byrow = FALSE)

matplot(Temperature, type = "l")

Temperature = c(nottem)

Month = c(cycle(nottem))

boxplot(Temperature ~ Month)

cpgram(nottem)

plot(stl(nottem, s.window = 4))

##  Code 3

##  Differencing

nottem.D1 = diff(nottem, lag = 12)

cpgram(nottem.D1)

par(mfrow=c(1,2))

acf(nottem.D1, lag.max=40)

pacf(nottem.D1, lag.max=40)

# (kill plot before proceeding)

##  Code 4

##  Fit S-ARIMA(p,d,q)(P,D,Q)

fit <- arima(nottem, order = c(1,0,0), list(order = c(2,1,0), period = 12))

tsdiag(fit)

cpgram(residuals(fit))

##  Code 5

##  Forecasting...

nott.fore <- predict(fit, n.ahead = 36)

ts.plot(nottem, nott.fore$pred, nott.fore$pred+2*nott.fore$se,


        nott.fore$pred-2*nott.fore$se, gpars = list(col = c(1,1,4,4)))

####机房6####

######################################

# code 01: look at the data

plot(AirPassengers, ylab="Air Passengers (1000's)")

segments(x0=1951, y0=100, x1=1961, y1=360, col = 2, lty = 2, lwd = 2 )

segments(x0=1950, y0=170, x1=1960.5, y1=620, col = 2, lty = 2, lwd = 2 )

######################################

# code 02: apply a transformation

APs = log(AirPassengers)

plot(APs)

######################################

# code 03: autocorrelation dominated by trend:

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))

plot(APs, ylab ="", main=expression(X[i]))

acf(APs, main = "")

pacf(APs, main = "")

######################################

# code 04: difference the data

AP1dif = diff(APs)

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))

plot(AP1dif, ylab = "", main=expression((1-B)*X[i]))

acf(AP1dif, lag.max = 48, main = "")

pacf(AP1dif, lag.max = 48, main = "")

######################################

# code 05: difference the data, at lag 12

AP12dif = diff(APs,lag=12)

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))

plot(AP12dif, ylab="", main=expression((1-B^{12})*X[i]))

acf(AP12dif, lag.max = 48, main = "")

pacf(AP12dif, lag.max = 48, main = "")

######################################

# code 06: difference the data at lag 12, and difference again at lag 1

Ys = diff(AP12dif)

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))

plot(Ys, ylab="",main=expression(Y[i]==(1-B)*(1-B^{12})*X[i]))

acf(Ys, lag.max = 48, main = "")

pacf(Ys, lag.max = 48, main = "")

# Kill graph before moving on

######################################

# code 07: select an ARIMA model

APfit =  arima(APs, order=c(0, 1, 0), seasonal = list(order=c(0, 1 ,0), period=12))

tsdiag(APfit)

APfit =  arima(APs, order=c(0, 1, 1), seasonal = list(order=c(0, 1 ,1), period=12))

tsdiag(APfit)

APfit

######################################

# code 08: produce forecasts...

AP.pred = predict(APfit, n.ahead = 30)

AP.pred

######################################

# code 09: plot forecasts...

plot(APs, xlim = c(1949, 1963), ylim = c(4.6,6.8),


    main="log[ Air Passengers (1000's) ]")

lines(AP.pred$pred,col="red")

lines(AP.pred$pred+2*AP.pred$se,col="red",lty=3)

lines(AP.pred$pred-2*AP.pred$se,col="red",lty=3)

# Homework: transform the prediction back to the

# original scale and display forecasts with prediction

# bands where Y axis is in 1000's airline passengers

######################################

# code 10: Periodograms

cpgram(AirPassengers)

cpgram(APs)

cpgram(AP1dif)

cpgram(AP12dif)

cpgram(Ys)

cpgram(residuals(APfit))

你可能感兴趣的:(2022-04-02)