nnls.R-20170908

cdata=condata2
cdata=as.matrix(cdata)
colnames(cdata)=c('y','x1','x2','x7^2','x7','x17',paste("x3",0:1,sep="="),paste("x4",0:6,sep="="),
                     paste("x5","=",0:1,sep=""),paste("x6","=",0:1,sep=""),#paste("x7","=",0:8,sep=""),
                     paste("x8","=",0:1,sep=""),paste("x9","=",0:1,sep=""),paste("x10","=",0:1,sep=""),
                     paste("x11","=",0:1,sep=""),paste("x12","=",0:1,sep=""),paste("x13","=",0:2,sep=""),
                     paste("x14","=",0:3,sep=""),paste("x15","=",0:1,sep=""),paste("x16","=",0:1,sep=""))

#带约束的最小二乘回归
library(pracma)
lsqlincon(as.matrix(cdata[,2:40]),as.matrix(cdata[,1]),A = -diag(1,39), b = rep(0,39),
          Aeq = NULL, beq = NULL, lb =NULL,  ub = NULL)
lsqlincon(as.matrix(cdata[,2:40]),as.matrix(cdata[,1]),A = NULL, b = NULL,
          Aeq = NULL, beq = NULL, lb =rep(0,39),  ub = NULL)
bl=data.matrix(lsqlincon(cdata[,2:40],cdata[,1],A = NULL, b = NULL,
                         Aeq = NULL, beq = NULL, lb =rep(0,39),  ub = NULL))
bl=data.matrix(nnls(cdata[,2:40],condata[,1])$x)
rownames(bl)=c('x1','x2','x7^2','x7','x17',paste("x3",0:1,sep="="),paste("x4",0:6,sep="="),
               paste("x5","=",0:1,sep=""),paste("x6","=",0:1,sep=""),#paste("x7","=",0:8,sep=""),
               paste("x8","=",0:1,sep=""),paste("x9","=",0:1,sep=""),paste("x10","=",0:1,sep=""),
               paste("x11","=",0:1,sep=""),paste("x12","=",0:1,sep=""),paste("x13","=",0:2,sep=""),
               paste("x14","=",0:3,sep=""),paste("x15","=",0:1,sep=""),paste("x16","=",0:1,sep=""))
ind=rownames(bl)[which(bl!= 0)] #没删去的变量
ypre=cdata[,2:40]%*%bl

c=as.matrix(c)
ypre=cdata[,2:40]%*%c

#拟合优度
sum((ypre-mean(cdata[,1]))^2)/sum((cdata[,1]-mean(cdata[,1]))^2)
sum((ypre-mean(cdata[,1]))^2)
sum((ypre-cdata[,1])^2)
sum((cdata[,1]-mean(cdata[,1]))^2)
1-sum((ypre-cdata[,1])^2)/sum((cdata[,1]-mean(cdata[,1]))^2)
library(MASS)
C=ginv(t(cdata[,2:40])%*%cdata[,2:40])
cc=as.matrix(diag(C))
sigma=(sum((cdata[,1]-ypre)^2)/(nrow(bl)-1))^(1/2)
t=bl/sigma*cc
dt(abs(t),nrow(cdata)-nrow(bl)-1,0.975)

#模拟用户
user=as.matrix(user)
colnames(user)=c('x1','x2','x7^2','x7','x17',paste("x3",0:1,sep="="),paste("x4",0:6,sep="="),
                 paste("x5","=",0:1,sep=""),paste("x6","=",0:1,sep=""),#paste("x7","=",0:8,sep=""),
                 paste("x8","=",0:1,sep=""),paste("x9","=",0:1,sep=""),paste("x10","=",0:1,sep=""),
                 paste("x11","=",0:1,sep=""),paste("x12","=",0:1,sep=""),paste("x13","=",0:2,sep=""),
                 paste("x14","=",0:3,sep=""),paste("x15","=",0:1,sep=""),paste("x16","=",0:1,sep=""))
yuser.pre=user%*%bl
yuser.pre=user%*%c
write.table(yuser.pre, file = "/Users/vicky/Documents/code/R/yuserpre.csv")

#计算误差
library(caret)
c=10 #10折交叉验证
set.seed(1234)
res=matrix(0,c,1)
inde=matrix(0,c,1)

for (k in 1:c) {
  index=createFolds(cdata[,1], k=c, list = TRUE, returnTrain = FALSE) #分成10份
  train=cdata[-index[[k]], ] 
  test=cdata[index[[k]], ]
  colnames(train)=c('y','x1','x2','x7^2','x7','x17',paste("x3",0:1,sep="="),paste("x4",0:6,sep="="),
                    paste("x5","=",0:1,sep=""),paste("x6","=",0:1,sep=""),#paste("x7","=",0:8,sep=""),
                    paste("x8","=",0:1,sep=""),paste("x9","=",0:1,sep=""),paste("x10","=",0:1,sep=""),
                    paste("x11","=",0:1,sep=""),paste("x12","=",0:1,sep=""),paste("x13","=",0:2,sep=""),
                    paste("x14","=",0:3,sep=""),paste("x15","=",0:1,sep=""),paste("x16","=",0:1,sep=""))
  colnames(test)=c('y','x1','x2','x7^2','x7','x17',paste("x3",0:1,sep="="),paste("x4",0:6,sep="="),
                   paste("x5","=",0:1,sep=""),paste("x6","=",0:1,sep=""),#paste("x7","=",0:8,sep=""),
                   paste("x8","=",0:1,sep=""),paste("x9","=",0:1,sep=""),paste("x10","=",0:1,sep=""),
                   paste("x11","=",0:1,sep=""),paste("x12","=",0:1,sep=""),paste("x13","=",0:2,sep=""),
                   paste("x14","=",0:3,sep=""),paste("x15","=",0:1,sep=""),paste("x16","=",0:1,sep=""))
  
  bl1=data.matrix(nnls(train[,2:40],train[,1])$x)
  rownames(bl1)=c('x1','x2','x7^2','x7','x17',paste("x3",0:1,sep="="),paste("x4",0:6,sep="="),
                 paste("x5","=",0:1,sep=""),paste("x6","=",0:1,sep=""),#paste("x7","=",0:8,sep=""),
                 paste("x8","=",0:1,sep=""),paste("x9","=",0:1,sep=""),paste("x10","=",0:1,sep=""),
                 paste("x11","=",0:1,sep=""),paste("x12","=",0:1,sep=""),paste("x13","=",0:2,sep=""),
                 paste("x14","=",0:3,sep=""),paste("x15","=",0:1,sep=""),paste("x16","=",0:1,sep=""))
  print(rownames(bl1)[which(bl1!= 0)])
  ytest=test[,2:40]%*%bl1
  res[k]=mean(abs(ytest-test[,1])/test[,1])
}
mean(res)

 

你可能感兴趣的:(R)