## Econometric Analysis ## install.packages("wooldridge") library(wooldridge); library(glm2); library(mlogit);library(MASS) # 2. Basic method ##### ## 2.1 logistic regression #### data("mroz") ?mroz mod = glm2(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6, family=binomial(link = "logit"), data =mroz); summary(mod) coef = summary(mod)$coefficients ## 2.2 multinomial model #### data("Fishing", package = "mlogit") Fish <- mlogit.data(Fishing, varying = c(2:9), shape = "wide", choice = "mode") ## a pure "conditional" model summary(mlogit(mode ~ price + catch, data = Fish)) ## a pure "multinomial model" summary(mlogit(mode ~ 0 | income, data = Fish)) ## which can also be estimated using multinom (package nnet) library("nnet") summary(multinom(mode ~ income, data = Fishing)) ## a "mixed" model m <- mlogit(mode ~ price+ catch | income, data = Fish) summary(m) ## same model with charter as the reference level m <- mlogit(mode ~ price+ catch | income, data = Fish, reflevel = "charter") ## same model with a subset of alternatives : charter, pier, beach m <- mlogit(mode ~ price+ catch | income, data = Fish, alt.subset = c("charter", "pier", "beach")) ## 2.3 poisson regresson data(crabs) satellites <- crabs$Satellites width.shifted <- crabs$Width - min(crabs$Width) dark <- crabs$Dark goodspine <- crabs$GoodSpine fit1 <- glm(satellites ~ width.shifted + factor(dark) + factor(goodspine),family = poisson(link="identity"), start = rep(1,4)) fit2 <- glm2(satellites ~ width.shifted + factor(dark) + factor(goodspine), family = poisson(link="identity"), start = rep(1,4)) fit1.eq <- glm2(satellites ~ width.shifted + factor(dark) + factor(goodspine),family = poisson(link="identity"), start = rep(1,4), method = "glm.fit") ## 2.4 negative regression fit3 <- glm.nb(satellites ~ width.shifted + factor(dark) + factor(goodspine)) coef = summary(fit3)$coefficients ## 2.5 Tobit regression library(survival) # Economists fit a model called `tobit regression', which is a standard # linear regression with Gaussian errors, and left censored data. tobinfit <- survreg(Surv(durable, durable>0, type='left') ~ age + quant,data=tobin, dist='gaussian') tt = summary(tobinfit) coef = summary(tobinfit)$coefficients ## 2.6 two stage model library(sampleSelection) ## Estimate a simple female wage model taking into account the labour ## force participation data(Mroz87) a <- heckit(lfp ~ huswage + kids5 + mtr + fatheduc + educ + city, log(wage) ~ educ + city, data=Mroz87) ## extract all coefficients of the model: coef( a ) ## now extract the coefficients of the outcome model only: coef( a, part="outcome") ## extract all coefficients, standard errors, t-values ## and p-values of the model: coef( summary( a ) ) ## now extract the coefficients, standard errors, t-values ## and p-values of the outcome model only: coef( summary( a ), part="outcome") ## 2.7 Multilevel Model ## linear mixed models - reference values from older code library(lme4) library(mlmRev) data(Exam);names(Exam) ### null-model mod = lmer(normexam ~ 1 + (1 | school), data=Exam) summary(mod) ### random intercept, fixed predictor in individual level mod = lmer(normexam ~ schavg + (1 | school), data=Exam) summary(mod) ### random intercept, random slope mod = lmer(normexam ~ standLRT + (standLRT | school), data=Exam, method="ML") summary(mod) ### random intercept, individual and group level predictor mod = lmer(normexam ~ standLRT + schavg + (1 + standLRT | school), data=Exam) summary(mod) ### random intercept, cross-level interaction mod = lmer(normexam ~ standLRT * schavg + (1 + standLRT | school), data=Exam) summary(mod) ## 2.7 survival regression # Fit an exponential model: the two fits are the same library(survival) mod = survreg(Surv(futime, fustat) ~ ecog.ps + rx, ovarian, dist='weibull', scale=1) mod = survreg(Surv(futime, fustat) ~ ecog.ps + rx, ovarian, dist="exponential") # A model with different baseline survival shapes for two groups, i.e., # two different scale parameters data(lung) mod = survreg(Surv(time, status) ~ ph.ecog + age + strata(sex), dist="exponential",data= lung) summary(mod) #### cox model # Create the simplest test data set test1 <- list(time=c(4,3,1,1,2,2,3), status=c(1,1,1,0,1,1,0), x=c(0,2,1,1,1,0,0), sex=c(0,0,0,0,1,1,1)) # Fit a stratified model test2 = coxph(Surv(time, status) ~ x + strata(sex), test1) ### frailty model library(frailtypack) ###--- COX proportional hazard model (SHARED without frailties) ---### ###--- estimated with penalized likelihood ---### data(kidney) frailtyPenal(Surv(time,status)~sex+age, n.knots=12,kappa=10000,data=kidney) ###--- Shared Frailty model ---### frailtyPenal(Surv(time,status)~cluster(id)+sex+age, n.knots=12,kappa=10000,data=kidney) #-- stratified analysis data(readmission) frailtyPenal(Surv(time,event)~cluster(id)+dukes+strata(sex), n.knots=10,kappa=c(10000,10000),data=readmission) ### competing risk library(cmprsk) # simulated data to test set.seed(10) ftime <- rexp(200) fstatus <- sample(0:2,200,replace=TRUE) cov <- matrix(runif(600),nrow=200) dimnames(cov)[[2]] <- c('x1','x2','x3') z <- crr(ftime,fstatus,cov, failcode=2, cencode=0) summary(z) ### mutliple state model library(mstate) data("ebmt4") ebmt = ebmt4 tmat <- transMat(x = list(c(2, 3, 5, 6), c(4, 5, 6), c(4, 5, 6), c(5, 6), c(), c()), names = c("Tx", "Rec", "AE", "Rec+AE", "Rel", "Death")) tmat msebmt <- msprep(data = ebmt, trans = tmat, time = c(NA, "rec", "ae", "recae", "rel", "srv"), status = c(NA, "rec.s", "ae.s", "recae.s", "rel.s", "srv.s"), keep = c("match", "proph", "year", "agecl")) events(msebmt) covs <- c("match", "proph", "year", "agecl") msebmt <- expand.covs(msebmt, covs, longnames = FALSE) msebmt[msebmt$id == 1, -c(9, 10, 12:48, 61:84)] msebmt[, c("Tstart", "Tstop", "time")] <- msebmt[, c("Tstart", "Tstop", "time")]/365.25 c0 <- coxph(Surv(Tstart, Tstop, status) ~ strata(trans), data = msebmt, method = "breslow") msf0 <- msfit(object = c0, vartype = "greenwood", trans = tmat) pt0 <- probtrans(msf0, predt = 0, method = "greenwood") summary(pt0, from = 1) # 4. Pandel Data ## 4.1 # Several models can be estimated with plm by filling the model argument: # the fixed effects model ("within"), # the pooling model ("pooling"), # the first-difference model ("fd"), # the between model ("between"), # the error components model ("random"). library("plm") ## https://cran.r-project.org/web/packages/plm/vignettes/plmPackage.html data("Grunfeld", package="plm") head(Grunfeld) grun.fe <- plm(inv~value+capital, data = Grunfeld, model = "within"); ## fixed effect summary(grun.fe) fixef(grun.fe, type = "dmean") summary(fixef(grun.fe, type = "dmean")) grun.re <- plm(inv~value+capital, data = Grunfeld, model = "random"); ## random effect summary(grun.re) ## Hausman test gw <- plm(inv~value+capital, data=Grunfeld, model="within") gr <- plm(inv~value+capital, data=Grunfeld, model="random") phtest(gw, gr) grun.twfe <- plm(inv~value+capital, data=Grunfeld, model="within", effect="twoways") tt = fixef(grun.twfe, type = "dmean"); ## effect = "time" summary(tt) ### Variable coefficients model data("Gasoline", package = "plm") form <- lgaspcar ~ lincomep + lrpmg + lcarpcap gasw <- plm(form, data = Gasoline, model = "within") gasp <- plm(form, data = Gasoline, model = "pooling") gasnp <- pvcm(form, data = Gasoline, model = "within") pooltest(gasw, gasnp) pooltest(gasp, gasnp) pooltest(form, data = Gasoline, effect = "individual", model = "within") pooltest(form, data = Gasoline, effect = "individual", model = "pooling")