2019-05-31

https://www.cnblogs.com/karlpearson/p/6224148.html

https://blog.csdn.net/weixin_36372879/article/details/80493968

https://blog.csdn.net/glodon_mr_chen/article/details/79834517

#导入数据

setwd("D:/R")

wine=read.csv("winequality.csv",header=TRUE)

#data cleaning

wine = wine[complete.cases(wine),]

#PCA

library(stringr)

library(FactoMineR)

#绘图

res.pca <- PCA(wine[,-12], graph = TRUE)#delete Y=quality, plot the PCA graph

sdc=scale(wine)

pca.d=prcomp(sdc)

summary(pca.d)

#PCA降维

wine=wine[,-9:-11]

#查看定性变量分布,确定定性变量

hist(wine$quality)

table(wine$quality)

#分类

wine0 = wine[wine$quality==3,]

wine1 = wine[wine$quality==4,]

wine2 = wine[wine$quality==5,]

wine3 = wine[wine$quality==6,]

wine4 = wine[wine$quality==7,]

wine5 = wine[wine$quality==8,]

#抽样

label0= sample(c(1:10),dim(wine0[1]),replace= TRUE)

label1= sample(c(1:10),dim(wine1[1]),replace= TRUE)

label2= sample(c(1:10),dim(wine2[1]),replace= TRUE)

label3= sample(c(1:10),dim(wine3[1]),replace= TRUE)

label4= sample(c(1:10),dim(wine4[1]),replace= TRUE)

label5= sample(c(1:10),dim(wine5[1]),replace= TRUE)

wine0_train = wine0[label0<=5,]

wine0_test = wine0[label0>5,]

wine1_train = wine1[label1<=5,]

wine1_test = wine1[label1>5,]

wine2_train = wine2[label2<=5,]

wine2_test = wine2[label2>5,]

wine3_train = wine3[label3<=5,]

wine3_test = wine3[label3>5,]

wine4_train = wine4[label4<=5,]

wine4_test = wine4[label4>5,]

wine5_train = wine5[label5<=5,]

wine5_test = wine4[label5>5,]

wine_train = rbind(wine0_train,wine1_train,wine2_train,wine3_train,wine4_train,wine5_train)

wine_test = rbind(wine0_test,wine1_test,wine2_test,wine3_test,wine4_test,wine5_test)

re_log = multinomial(quality~.,data= wine_train) 

wine_train$quality = as.factor(wine_train$quality)

re_rf = randomForest(quality~,data = wine_train,ntree=5)

######################################

library(rpart)

library(rattle)

library(rpart.plot)

library(RColorBrewer)

library(ggplot2) # Data visualization

library(readr) # CSV file I/O, e.g. the read_csv function

install.packages("corrgram")

library(corrgram) # Correlograms http://www.datavis.ca/papers/corrgram.pdf

library(lattice) #required for nearest neighbors

library(FNN) # nearest neighbors techniques

library(pROC) # to make ROC curve

linear_quality = lm(quality ~ fixed.acidity+volatile.acidity+citric.acid+residual.sugar+chlorides+free.sulfur.dioxide+total.sulfur.dioxide+density+pH+sulphates+alcohol, data=wine)

summary(linear_quality)

#########################################

ID3  方法生成树枝

re_id3 <-rpart(quality~.,data=wine_train,method="class", parms=list(split="information"))

fancyRpartPlot(dtree)

########################################

CART 方法生成树枝

re_CART = rpart(quality~.,data= wine_train,method = "class",parms = list(split="gini"),con

你可能感兴趣的:(2019-05-31)