[R] Proximal Gradient Descend for Lasso

This is a short code for studying proximal gradient descent algorithm.

#--------------------------- functions to be used ----------------------

## the main function f = g + h
f <- function(x, A, b, lambda){
        1/2*norm(A %*% x - b, "2")^2 + lambda*sum(abs(x))
}
## smooth function g
g <- function(x, A, b){
        1/2*norm(A %*% x - b, "2")^2
}
## subgradient of smooth function
Subg <- function(x, A, b){
        t(A) %*% (A %*% x - b)
}
## proximal function of none smooth function h
prox_L1 <- function(x, lambda){
        sign(x) * pmax(abs(x) - lambda, 0)
}

#------------------------------------- generate data --------------------------------------

# dimension
n <- 500                ## number of samples
p <- 1000               ## number of features
s <- 100                ## number of inportant features
## coefficients
beta <- c(runif(s, -2, 2), rep(0, p- s))
A <- matrix(rnorm(p*p), p, p)
A <- scale(A, TRUE, TRUE)
b <-  A %*% beta + 0.1*rnorm(n)
# tunning paramter
lambda <- 0.1*max(abs(t(A) %*% b))

#-------------------------------- proximal gradient descend -------------------------------
# initilization
t <- 1                   ## step size
beta0 <- 0.5             ## line search for t paramter
x <- rep(10, p)          ## initilization of x
xprev <- x
maxiter <- 200           ## number of iterations

# to obtain objective function value
opt_f <- rep(0, maxiter)

for(i in 1:maxiter){

        cat(i, fill = TRUE)

        # subgradient
        grad_x <- Subg(x, A, b)
        # proximal gradient descend stage and line search
        while(TRUE){

                z <- prox_L1(x - t*grad_x, t*lambda)
                if(g(z , A, b) < g(x, A, b) + t(grad_x) %*% (z - x) + 1/(2*t)*norm(z - x, "2")^2) break
                t <- beta0*t
        }

        # store temp x value
        xprev <- x
        x <- z

        # compute value of objective function
        opt_f[i] <- f(x, A, b, lambda)
        if(i > 1 && (abs(opt_f[i] - opt_f[i-1]) < 10^-8)) break 
}

#----------------------------------accelerated proximal gradient descent------------------------

# initilization
t <- 1                   ## step size
beta0 <- 0.5             ## line search for t paramter
x <- rep(10, p)          ## initilization of x
xprev <- x
maxiter <- 200           ## number of iterations

# to obtain objective function value
opt_f_fast <- rep(0, maxiter)

for(i in 1:maxiter){

        cat(i, fill = TRUE)
        # accelarated stage
        y <- x + (i-2)/(i+1)*(x - xprev)

        # subgradient
        grad_y <- Subg(y, A, b)
        # proximal gradient descend stage and line search
        while(TRUE){

                z <- prox_L1(y - t*grad_y, t*lambda)
                if(g(z , A, b) < g(y, A, b) + t(grad_y) %*% (z - y) + 1/(2*t)*norm(z - y, "2")^2) break
                t <- beta0*t
        }

        # store temp x value
        xprev <- x
        x <- z

        # compute value of objective function
        opt_f_fast[i] <- f(x, A, b, lambda)
        if(i > 1 && (abs(opt_f_fast[i] - opt_f_fast[i-1]) < 10^-8)) break 
}

#---------------------------plot-----------------------------------------------------------
library(ggplot2)
opt_f[opt_f==0] <- opt_f[which(opt_f!=0)[length(which(opt_f!=0))]]
opt_f_fast[opt_f_fast==0] <- opt_f_fast[which(opt_f_fast!=0)[length(which(opt_f_fast!=0))]]
data <- data.frame(iter=seq(200), opt_f=opt_f, opt_f_fast=opt_f_fast)
# gather data
library(tidyr)
test_data <- gather(data, variable, value, -iter)
ggplot(test_data, aes(x=iter, y=value, color=variable)) + 
        geom_line()

# melt data
library(reshape2)
test_data <- melt(data, id="iter")
ggplot(test_data, aes(x=iter, y=value, color=variable)) + 
        geom_line()

[R] Proximal Gradient Descend for Lasso_第1张图片

你可能感兴趣的:(statistics,R,convex,optimization)