之前有小伙伴说希望公开之前变量筛选文章(一些变量筛选方法——5、真实数据与总结)的代码,这里时隔好多个月,将之前的代码整理出来了。
当时由于时间有限,只有几天的时间将论文和代码赶出来,所以写的不是很好,全程for循环,还请见谅!
# --------------------- simulation --------------------- #
library(glmnet)
library(ISLR)
library(leaps)
library(BeSS)
library(energy)
library(ncvreg)
gen_y <- function(x1, x2, x3, x4) {
c_vec <- c(2, 0.5, 3, 2)
a <- 4 * log(n) / sqrt(n)
U <- rbinom(n = 4, size = 1, prob = 0.4)
Z <- rnorm(4)
beta <- (-1) ^ U * (a + abs(Z))
b <- runif(3, -1, 1)
eps <- rnorm(n) * 0.01
y1 <- c_vec[1] * beta[1] * x1 + c_vec[2] * beta[2] * x2 +
c_vec[3] * beta[3] * ifelse(x3 < 0, 1, 0) +
c_vec[4] * beta[4] * x4 + eps
y2 <- c_vec[1] * beta[1] * x1 * x2 +
c_vec[2] * beta[2] * ifelse(x3 < 0, 1, 0) +
c_vec[4] * beta[4] * x4 + eps
y3 <- c_vec[1] * beta[1] * x1 + c_vec[2] * beta[2] * x2 +
c_vec[3] * beta[3] * ifelse(x3 < 0, 1, 0) +
exp(c_vec[4] * abs(x4)) + eps
y4 <- b[1] * abs(x1) + b[2] * x2 / (2 - x2) + b[3] * exp(x3) / 3 + eps
y5 <- b[1] * sin(x1) + b[2] * tanh(x2) + b[3] * x1 / x2 + eps
list(y1, y2, y3, y4, y5)
}
gen_x <- function(p) {
X <- matrix(rnorm(n * p), nrow = n, ncol = p)
X
}
# k = 1, 2, 3, 4, 5, 为所选择的公式
gen_dat <- function(p, k) {
X <- gen_x(p)
dat <- cbind(X, gen_y(X[, 1], X[, 2], X[, 3], X[, 4])[[k]])
colnames(dat) <- c(paste0('x', 1:p), 'y')
dat
}
# ------------------ 1
# 后面将选用最优子集选择,向前逐步回归,向后逐步回归,LASSO,SCAD,PDAS
n <- 200
# p_vec <- c(20, 30, 50, 100, 200, 500)
p_vec <- c(10, 20, 30)
p_ture_vec <- c(4, 4, 4, 3, 2)
m.t.leaps = sd.t.leaps = m.TP.leaps = sd.TP.leaps = m.FP.leaps = sd.FP.leaps = matrix(nrow = 5, ncol = length(p_vec))
m.t.forward = sd.t.forward = m.TP.forward = sd.TP.forward = m.FP.forward = sd.FP.forward = matrix(nrow = 5, ncol = length(p_vec))
m.t.backward = sd.t.backward = m.TP.backward = sd.TP.backward = m.FP.backward = sd.FP.backward = matrix(nrow = 5, ncol = length(p_vec))
m.t.glmnet = sd.t.glmnet = m.TP.glmnet = sd.TP.glmnet = m.FP.glmnet = sd.FP.glmnet = matrix(nrow = 5, ncol = length(p_vec))
m.t.ncvreg = sd.t.ncvreg = m.TP.ncvreg = sd.TP.ncvreg = m.FP.ncvreg = sd.FP.ncvreg = matrix(nrow = 5, ncol = length(p_vec))
m.t.spdas = sd.t.spdas = m.TP.spdas = sd.TP.spdas = m.FP.spdas = sd.FP.spdas = matrix(nrow = 5, ncol = length(p_vec))
for(m in 1:length(p_vec)) {
p <- p_vec[m]
for(k in 1:5) {
t.leaps = TP.leaps = FP.leaps = vector()
t.forward = TP.forward = FP.forward = vector()
t.backward = TP.backward = FP.backward = vector()
t.glmnet = TP.glmnet = FP.glmnet = vector()
t.ncvreg = TP.ncvreg = FP.ncvreg = vector()
t.spdas = TP.spdas = FP.spdas = vector()
for(i in 1:100) {
set.seed(2018 + i)
dat_temp <- gen_dat(p, k)
dat <- list()
dat$x <- dat_temp[, 1:p]
dat$y <- dat_temp[, p + 1]
p_ture <- p_ture_vec[k]
# Best subset selection with "leaps" package
t.leaps[i] <- system.time(fit.leaps <- regsubsets(dat$x ,dat$y, really.big = TRUE, nvmax = p, all.best = TRUE))[3]
mark <- which.min(log(fit.leaps$bound[-1] / n) * n + 2 * (1:p)) # location of minimum of AIC
beta.leaps <- rep(0, p)
names(beta.leaps) <- colnames(dat$x)
beta.leaps[names(coef(fit.leaps, mark))[-1]] <- coef(fit.leaps, mark)[-1]
df.leaps = sum(beta.leaps != 0)
TP.leaps[i] = length(intersect(which(beta.leaps != 0), 1:p_ture))
FP.leaps[i] = df.leaps - TP.leaps[i]
# Forward stepwise with "leaps" package
t.forward[i] <- system.time(fit.forward <- regsubsets(dat$x ,dat$y, really.big = TRUE, nvmax = p, all.best = TRUE, method = 'forward'))[3]
mark <- which.min(log(fit.forward$bound[-1] / n) * n + 2 * (1:p)) # location of minimum of AIC
beta.forward <- rep(0, p)
names(beta.forward) <- colnames(dat$x)
beta.forward[names(coef(fit.forward, mark))[-1]] <- coef(fit.forward, mark)[-1]
df.forward = sum(beta.forward != 0)
TP.forward[i] = length(intersect(which(beta.forward != 0), 1:p_ture))
FP.forward[i] = df.forward - TP.forward[i]
# Backward stepwise with "leaps" package
t.backward[i] <- system.time(fit.backward <- regsubsets(dat$x ,dat$y, really.big = TRUE, nvmax = p, all.best = TRUE, method = 'backward'))[3]
mark <- which.min(log(fit.backward$bound[-1] / n) * n + 2 * (1:p)) # location of minimum of AIC
beta.backward <- rep(0, p)
names(beta.backward) <- colnames(dat$x)
beta.backward[names(coef(fit.backward, mark))[-1]] <- coef(fit.backward, mark)[-1]
df.backward = sum(beta.backward != 0)
TP.backward[i] = length(intersect(which(beta.backward != 0), 1:p_ture))
FP.backward[i] = df.backward - TP.backward[i]
# fit a glmnet with method CV
t.glmnet[i] <- system.time(fit.cv.glmnet <- cv.glmnet(dat$x, dat$y))[3]
fit.glmnet <- glmnet(dat$x, dat$y, lambda = fit.cv.glmnet$lambda.1se)
beta.glmnet <- fit.glmnet$beta
beta0.glmnet <- fit.glmnet$a0
df.glmnet <- sum(beta.glmnet != 0)
TP.glmnet[i] <- length(intersect(which(beta.glmnet != 0), 1:p_ture))
FP.glmnet[i] <- df.glmnet - TP.glmnet[i]
# SCAD with "ncvreg" package
t.ncvreg[i] <- system.time(fit.cv.ncvreg <- cv.ncvreg(dat$x, dat$y, penalty = 'SCAD'))[3]
fit.ncvreg <- ncvreg(dat$x, dat$y, lambda = fit.cv.ncvreg$lambda.min, penalty = 'SCAD')
beta.ncvreg <- fit.ncvreg$beta
beta0.ncvreg <- fit.ncvreg$a0
df.ncvreg <- sum(beta.ncvreg != 0)
TP.ncvreg[i] <- length(intersect(which(beta.ncvreg != 0), 1:p_ture))
FP.ncvreg[i] <- df.ncvreg - TP.ncvreg[i]
# fit a BeSS model with method = "sequential"
t.spdas[i] <- system.time(fit.spdas <- bess(dat$x, dat$y,
method = "sequential", epsilon = 0))[3]
beta.spdas <- coef(fit.spdas, sparse = F, type = "AIC")
df.spdas <- sum(beta.spdas[-1] != 0)
TP.spdas[i] <- length(intersect(which(beta.spdas[-1] != 0), 1:p_ture))
FP.spdas[i] <- df.spdas - TP.spdas[i]
print(paste(i, k ,m))
}
m.t.leaps[k, m] <- mean(t.leaps)
sd.t.leaps[k, m] <- sd(t.leaps)
m.TP.leaps[k, m] <- mean(TP.leaps)
sd.TP.leaps[k, m] <- sd(TP.leaps)
m.FP.leaps[k, m] <- mean(FP.leaps)
sd.FP.leaps[k, m] <- sd(FP.leaps)
m.t.forward[k, m] <- mean(t.forward)
sd.t.forward[k, m] <- sd(t.forward)
m.TP.forward[k, m] <- mean(TP.forward)
sd.TP.forward[k, m] <- sd(TP.forward)
m.FP.forward[k, m] <- mean(FP.forward)
sd.FP.forward[k, m] <- sd(FP.forward)
m.t.backward[k, m] <- mean(t.backward)
sd.t.backward[k, m] <- sd(t.backward)
m.TP.backward[k, m] <- mean(TP.backward)
sd.TP.backward[k, m] <- sd(TP.backward)
m.FP.backward[k, m] <- mean(FP.backward)
sd.FP.backward[k, m] <- sd(FP.backward)
m.t.glmnet[k, m] <- mean(t.glmnet)
sd.t.glmnet[k, m] <- sd(t.glmnet)
m.TP.glmnet[k, m] <- mean(TP.glmnet)
sd.TP.glmnet[k, m] <- sd(TP.glmnet)
m.FP.glmnet[k, m] <- mean(FP.glmnet)
sd.FP.glmnet[k, m] <- sd(FP.glmnet)
m.t.ncvreg[k, m] <- mean(t.ncvreg)
sd.t.ncvreg[k, m] <- sd(t.ncvreg)
m.TP.ncvreg[k, m] <- mean(TP.ncvreg)
sd.TP.ncvreg[k, m] <- sd(TP.ncvreg)
m.FP.ncvreg[k, m] <- mean(FP.ncvreg)
sd.FP.ncvreg[k, m] <- sd(FP.ncvreg)
m.t.spdas[k, m] <- mean(t.spdas)
sd.t.spdas[k, m] <- sd(t.spdas)
m.TP.spdas[k, m] <- mean(TP.spdas)
sd.TP.spdas[k, m] <- sd(TP.spdas)
m.FP.spdas[k, m] <- mean(FP.spdas)
sd.FP.spdas[k, m] <- sd(FP.spdas)
}
}
# save(m.t.leaps, sd.t.leaps, m.TP.leaps, sd.TP.leaps, m.FP.leaps, sd.FP.leaps,
# m.t.forward, sd.t.forward, m.TP.forward, sd.TP.forward, m.FP.forward, sd.FP.forward,
# m.t.backward, sd.t.backward, m.TP.backward, sd.TP.backward, m.FP.backward, sd.FP.backward,
# m.t.glmnet, sd.t.glmnet, m.TP.glmnet, sd.TP.glmnet, m.FP.glmnet, sd.FP.glmnet,
# m.t.ncvreg, sd.t.ncvreg, m.TP.ncvreg, sd.TP.ncvreg, m.FP.ncvreg, sd.FP.ncvreg,
# m.t.spdas, sd.t.spdas, m.TP.spdas, sd.TP.spdas, m.FP.spdas, sd.FP.spdas,
# file = 'D:\\Kanny\\simulation1.rda')
t.leaps = TP.leaps = FP.leaps = matrix(nrow = 5, ncol = 3)
t.forward = TP.forward = FP.forward = matrix(nrow = 5, ncol = 3)
t.backward = TP.backward = FP.backward = matrix(nrow = 5, ncol = 3)
t.glmnet = TP.glmnet = FP.glmnet = matrix(nrow = 5, ncol = 3)
t.ncvreg = TP.ncvreg = FP.ncvreg = matrix(nrow = 5, ncol = 3)
t.spdas = TP.spdas = FP.spdas = matrix(nrow = 5, ncol = 3)
options(scipen = 200)
for(i in 1:5) {
for(j in 1:3) {
t.leaps[i, j] <- paste(round(m.t.leaps[i, j], 4))
t.forward[i, j] <- paste(round(m.t.forward[i, j], 4))
t.backward[i, j] <- paste(round(m.t.backward[i, j], 4))
t.glmnet[i, j] <- paste(round(m.t.glmnet[i, j], 4))
t.ncvreg[i, j] <- paste(round(m.t.ncvreg[i, j], 4))
t.spdas[i, j] <- paste(round(m.t.spdas[i, j], 4))
TP.leaps[i, j] <- paste(round(m.TP.leaps[i, j], 2), '±', round(sd.TP.leaps[i, j], 2))
TP.forward[i, j] <- paste(round(m.TP.forward[i, j], 2), '±', round(sd.TP.forward[i, j], 2))
TP.backward[i, j] <- paste(round(m.TP.backward[i, j], 2), '±', round(sd.TP.backward[i, j], 2))
TP.glmnet[i, j] <- paste(round(m.TP.glmnet[i, j], 2), '±', round(sd.TP.glmnet[i, j], 2))
TP.ncvreg[i, j] <- paste(round(m.TP.ncvreg[i, j], 2), '±', round(sd.TP.ncvreg[i, j], 2))
TP.spdas[i, j] <- paste(round(m.TP.spdas[i, j], 2), '±', round(sd.TP.spdas[i, j], 2))
FP.leaps[i, j] <- paste(round(m.FP.leaps[i, j], 2), '±', round(sd.FP.leaps[i, j], 2))
FP.forward[i, j] <- paste(round(m.FP.forward[i, j], 2), '±', round(sd.FP.forward[i, j], 2))
FP.backward[i, j] <- paste(round(m.FP.backward[i, j], 2), '±', round(sd.FP.backward[i, j], 2))
FP.glmnet[i, j] <- paste(round(m.FP.glmnet[i, j], 2), '±', round(sd.FP.glmnet[i, j], 2))
FP.ncvreg[i, j] <- paste(round(m.FP.ncvreg[i, j], 2), '±', round(sd.FP.ncvreg[i, j], 2))
FP.spdas[i, j] <- paste(round(m.FP.spdas[i, j], 2), '±', round(sd.FP.spdas[i, j], 2))
}
}
library(xtable)
#### Time
# p = 10
l <- 1
table_out <- cbind(t.leaps[, l], t.forward[, l], t.backward[, l], t.glmnet[, l], t.ncvreg[, l], t.spdas[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Best SS', 'Forward', 'Backward', 'LASSO', 'SCAD', 'PDAS')
xtable(table_out, caption = '方法耗时,p = 10')
# p = 20
l <- 2
table_out <- cbind(t.leaps[, l], t.forward[, l], t.backward[, l], t.glmnet[, l], t.ncvreg[, l], t.spdas[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Best SS', 'Forward', 'Backward', 'LASSO', 'SCAD', 'PDAS')
xtable(table_out, caption = '方法耗时,p = 20')
# p = 30
l <- 3
table_out <- cbind(t.leaps[, l], t.forward[, l], t.backward[, l], t.glmnet[, l], t.ncvreg[, l], t.spdas[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Best SS', 'Forward', 'Backward', 'LASSO', 'SCAD', 'PDAS')
xtable(table_out, caption = '方法耗时,p = 30')
#### True Positive
# p = 10
l <- 1
table_out <- cbind(TP.leaps[, l], TP.forward[, l], TP.backward[, l], TP.glmnet[, l], TP.ncvreg[, l], TP.spdas[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Best SS', 'Forward', 'Backward', 'LASSO', 'SCAD', 'PDAS')
xtable(table_out, caption = 'True Positive,p = 10')
# p = 20
l <- 2
table_out <- cbind(TP.leaps[, l], TP.forward[, l], TP.backward[, l], TP.glmnet[, l], TP.ncvreg[, l], TP.spdas[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Best SS', 'Forward', 'Backward', 'LASSO', 'SCAD', 'PDAS')
xtable(table_out, caption = 'True Positive,p = 20')
# p = 30
l <- 3
table_out <- cbind(TP.leaps[, l], TP.forward[, l], TP.backward[, l], TP.glmnet[, l], TP.ncvreg[, l], TP.spdas[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Best SS', 'Forward', 'Backward', 'LASSO', 'SCAD', 'PDAS')
xtable(table_out, caption = 'True Positive,p = 30')
#### False Positive
# p = 10
l <- 1
table_out <- cbind(FP.leaps[, l], FP.forward[, l], FP.backward[, l], FP.glmnet[, l], FP.ncvreg[, l], FP.spdas[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Best SS', 'Forward', 'Backward', 'LASSO', 'SCAD', 'PDAS')
xtable(table_out, caption = 'False Positive,p = 10')
# p = 20
l <- 2
table_out <- cbind(FP.leaps[, l], FP.forward[, l], FP.backward[, l], FP.glmnet[, l], FP.ncvreg[, l], FP.spdas[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Best SS', 'Forward', 'Backward', 'LASSO', 'SCAD', 'PDAS')
xtable(table_out, caption = 'False Positive,p = 20')
# p = 30
l <- 3
table_out <- cbind(FP.leaps[, l], FP.forward[, l], FP.backward[, l], FP.glmnet[, l], FP.ncvreg[, l], FP.spdas[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Best SS', 'Forward', 'Backward', 'LASSO', 'SCAD', 'PDAS')
xtable(table_out, caption = 'False Positive,p = 30')
# ------------------ 2
n <- 200
# p_vec <- c(20, 30, 50, 100, 200, 500)
p_vec <- c(100, 200)
p_ture_vec <- c(4, 4, 4, 3, 2)
m.t.cor = sd.t.cor = m.cor_out = sd.cor_out = matrix(nrow = 5, ncol = length(p_vec))
m.t.dcor = sd.t.dcor = m.dcor_out = sd.dcor_out = matrix(nrow = 5, ncol = length(p_vec))
m.t.rf = sd.t.rf = m.rf_out = sd.rf_out = matrix(nrow = 5, ncol = length(p_vec))
for(m in 1:length(p_vec)) {
p <- p_vec[m]
for(k in 1:5) {
t.cor = cor_out = vector()
t.dcor = dcor_out = vector()
t.rf = rf_out = vector()
for(i in 1:100) {
set.seed(2018 + i)
dat_temp <- gen_dat(p, k)
dat <- list()
dat$x <- dat_temp[, 1:p]
dat$y <- dat_temp[, p + 1]
p_ture <- p_ture_vec[k]
t.cor[i] <- system.time(cor_result <- sapply(1:p, function(j) cor(dat$x[, j], dat$y)))
cor_out[i] <- max(order(cor_result, decreasing = T)[1:p_ture])
t.dcor[i] <- system.time(dcor_result <- sapply(1:p, function(j) dcor(dat$x[, j], dat$y)))
dcor_out[i] <- max(order(dcor_result, decreasing = T)[1:p_ture])
t.rf[i] <- system.time(rf_result <- ranger(y ~ ., data = as.data.frame(dat_temp), importance = "impurity"))
rf_out[i] <- max(order(rf_result$variable.importance, decreasing = T)[1:p_ture])
print(paste(i, k ,m))
}
m.t.cor[k, m] <- mean(t.cor)
sd.t.cor[k, m] <- sd(t.cor)
m.cor_out[k, m] <- mean(cor_out)
sd.cor_out[k, m] <- sd(cor_out)
m.t.dcor[k, m] <- mean(t.dcor)
sd.t.dcor[k, m] <- sd(t.dcor)
m.dcor_out[k, m] <- mean(dcor_out)
sd.dcor_out[k, m] <- sd(dcor_out)
m.t.rf[k, m] <- mean(t.rf)
sd.t.rf[k, m] <- sd(t.rf)
m.rf_out[k, m] <- mean(rf_out)
sd.rf_out[k, m] <- sd(rf_out)
}
}
t.cor = cor_out = matrix(nrow = 5, ncol = 2)
t.dcor = dcor_out = matrix(nrow = 5, ncol = 2)
t.rf = rf_out = matrix(nrow = 5, ncol = 2)
options(scipen = 200)
for(i in 1:5) {
for(j in 1:2) {
t.cor[i, j] <- paste(round(m.t.cor[i, j], 4))
t.dcor[i, j] <- paste(round(m.t.dcor[i, j], 4))
t.rf[i, j] <- paste(round(m.t.rf[i, j], 4))
cor_out[i, j] <- paste(round(m.cor_out[i, j], 2), '±', round(sd.cor_out[i, j], 2))
dcor_out[i, j] <- paste(round(m.dcor_out[i, j], 2), '±', round(sd.dcor_out[i, j], 2))
rf_out[i, j] <- paste(round(m.rf_out[i, j], 2), '±', round(sd.rf_out[i, j], 2))
}
}
# save(t.cor, t.dcor, t.rf, cor_out, dcor_out, rf_out,
# file = 'D:\\Kanny\\simulation2.rda')
## 为了转化到latex里面可输出的表格,所以使用了下面的代码
library(xtable)
#### Time
# p = 100
l <- 1
table_out <- cbind(t.cor[, l], t.dcor[, l], t.rf[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Cor-SIS', 'DCor-SIS', 'RF')
xtable(table_out, caption = 'Time,p = 100')
# p = 200
l <- 2
table_out <- cbind(t.cor[, l], t.dcor[, l], t.rf[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Cor-SIS', 'DCor-SIS', 'RF')
xtable(table_out, caption = 'Time,p = 200')
#### 个数
# p = 100
l <- 1
table_out <- cbind(cor_out[, l], dcor_out[, l], rf_out[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Cor-SIS', 'DCor-SIS', 'RF')
xtable(t(table_out), caption = '个数,p = 100')
# p = 200
l <- 2
table_out <- cbind(cor_out[, l], dcor_out[, l], rf_out[, l])
table_out <- t(table_out)
colnames(table_out) <- paste0('公式', 1:5)
row.names(table_out) <- c('Cor-SIS', 'DCor-SIS', 'RF')
xtable(t(table_out), caption = '个数,p = 200')
# --------------------- example ---------------------
### 1
library(ISLR)
library(leaps)
library(glmnet)
names(Hitters)
regfit.full <- regsubsets(Salary ~ ., Hitters)
summary(regfit.full)
regfit.fwd <- regsubsets(Salary ~ ., data = Hitters, method = "forward")
summary(regfit.fwd)
regfit.bwd <- regsubsets(Salary~ ., data = Hitters, method = "backward")
summary(regfit.bwd)
dat <- na.omit(Hitters)
x <- model.matrix(Salary ~ ., dat)[, -1]
y <- dat$Salary
fit.cv.glmnet <- cv.glmnet(x, y)
plot(fit.cv.glmnet)
fit.glmnet <- glmnet(x, y, lambda = fit.cv.glmnet$lambda.1se)
fit.glmnet <- glmnet(x, y)
plot(fit.glmnet, label = T)
beta.glmnet <- fit.glmnet$beta
### 2
##### example2
library(data.table)
dat_ex2 <- fread('D:\\Kanny\\FPS-5.csv', data.table = F)
y <- dat_ex2$feature3208
# 转为哑变量
y.matrix <- model.matrix(~ y, data.frame(1:3600, y))
library(snowfall)
sfInit(parallel = TRUE, cpus = 4)
sfExport('dat_ex2', 'y.matrix')
sfLibrary(energy)
result <- sfSapply(1:(ncol(dat_ex2) - 1), function(i) {
dcor(y.matrix, dat_ex2[, i])
print(i)
})
sfStop()
# save(result, file = 'D:\\Kanny\\ex2_result1.rda')
load('D:\\Kanny\\ex2_result1.rda')
n <- nrow(dat_ex2)
rest_num <- round(n / log(n))
or_result <- order(result, decreasing = T)
ind_rest <- which(or_result <= rest_num)
dat_new_x <- dat_ex2[, ind_rest]
fit.glmnet1 <- glmnet(x = as.matrix(dat_new_x), y = y, family = 'multinomial', type.multinomial = 'grouped')
plot(fit.glmnet1)
# require(doMC)
# registerDoMC(cores = 4)
fit.cv.glmnet <- cv.glmnet(x = as.matrix(dat_new_x), y = y, family = 'multinomial', grouped = T, nfolds = 5)
plot(fit.cv.glmnet)
fit.glmnet <- glmnet(x = as.matrix(dat_new_x), y = y, family = 'multinomial', type.multinomial = 'grouped', lambda = fit.cv.glmnet$lambda.1se)
beta.glmnet <- fit.glmnet$beta
beta0.glmnet <- fit.glmnet$a0
sum(beta.glmnet$saglik != 0)