随机森林重要性排序-R

library(openxlsx)
wine = read.xlsx("E:/时间趋势/winequality-red-2.xlsx") 
#将数据集分为训练集和测试集,比例为7:3
train_sub = sample(nrow(wine),7/10*nrow(wine))
train_data = wine[train_sub,]
test_data = wine[-train_sub,]
library(pROC) #绘制ROC曲线
library(randomForest)
#数据预处理
train_data$Yield = as.factor(train_data$Yield)
test_data$Yield = as.factor(test_data$Yield)
wine_randomforest <- randomForest(Yield ~  Tmax+Tmin+AVRH+
                                      AVGT+SD+CWDI+
                                      P,
                                    data = train_data,
                                    ntree =500,
                                    mtry=3,
                                    importance=TRUE ,
                                    proximity=TRUE)
#查看变量的重要性
wine_randomforest$importance
varImpPlot(wine_randomforest, main = "variable importance",font = 2,lwd = 5,col = "red",col.axis = "blue",font.axis = 2,cex = 1,cex.axis = 5,pch = 16,cex.lab = 0.7,font.lab = 2)

#对测试集进行预测
pre_ran <- predict(wine_randomforest,newdata=test_data)
#将真实值和预测值整合到一起
obs_p_ran = data.frame(prob=pre_ran,obs=test_data$Yield)
#输出混淆矩阵
table(test_data$Yield,pre_ran,dnn=c("真实值","预测值"))
#绘制ROC曲线
ran_roc <- roc(test_data$Yield,as.numeric(pre_ran))
plot(ran_roc, print.auc=TRUE, auc.polygon=TRUE, grid=c(0.1, 0.2),grid.col=c("green", "red"), max.auc.polygon=TRUE,auc.polygon.col="skyblue", print.thres=TRUE,main='随机森林模型ROC曲线,mtry=3,ntree=500')


你可能感兴趣的:(r语言)