https://www.cnblogs.com/karlpearson/p/6224148.html
https://blog.csdn.net/weixin_36372879/article/details/80493968
https://blog.csdn.net/glodon_mr_chen/article/details/79834517
#导入数据
setwd("D:/R")
wine=read.csv("winequality.csv",header=TRUE)
#data cleaning
wine = wine[complete.cases(wine),]
#PCA
library(stringr)
library(FactoMineR)
#绘图
res.pca <- PCA(wine[,-12], graph = TRUE)#delete Y=quality, plot the PCA graph
sdc=scale(wine)
pca.d=prcomp(sdc)
summary(pca.d)
#PCA降维
wine=wine[,-9:-11]
#查看定性变量分布,确定定性变量
hist(wine$quality)
table(wine$quality)
#分类
wine0 = wine[wine$quality==3,]
wine1 = wine[wine$quality==4,]
wine2 = wine[wine$quality==5,]
wine3 = wine[wine$quality==6,]
wine4 = wine[wine$quality==7,]
wine5 = wine[wine$quality==8,]
#抽样
label0= sample(c(1:10),dim(wine0[1]),replace= TRUE)
label1= sample(c(1:10),dim(wine1[1]),replace= TRUE)
label2= sample(c(1:10),dim(wine2[1]),replace= TRUE)
label3= sample(c(1:10),dim(wine3[1]),replace= TRUE)
label4= sample(c(1:10),dim(wine4[1]),replace= TRUE)
label5= sample(c(1:10),dim(wine5[1]),replace= TRUE)
wine0_train = wine0[label0<=5,]
wine0_test = wine0[label0>5,]
wine1_train = wine1[label1<=5,]
wine1_test = wine1[label1>5,]
wine2_train = wine2[label2<=5,]
wine2_test = wine2[label2>5,]
wine3_train = wine3[label3<=5,]
wine3_test = wine3[label3>5,]
wine4_train = wine4[label4<=5,]
wine4_test = wine4[label4>5,]
wine5_train = wine5[label5<=5,]
wine5_test = wine4[label5>5,]
wine_train = rbind(wine0_train,wine1_train,wine2_train,wine3_train,wine4_train,wine5_train)
wine_test = rbind(wine0_test,wine1_test,wine2_test,wine3_test,wine4_test,wine5_test)
re_log = multinomial(quality~.,data= wine_train)
wine_train$quality = as.factor(wine_train$quality)
re_rf = randomForest(quality~,data = wine_train,ntree=5)
######################################
library(rpart)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(ggplot2) # Data visualization
library(readr) # CSV file I/O, e.g. the read_csv function
install.packages("corrgram")
library(corrgram) # Correlograms http://www.datavis.ca/papers/corrgram.pdf
library(lattice) #required for nearest neighbors
library(FNN) # nearest neighbors techniques
library(pROC) # to make ROC curve
linear_quality = lm(quality ~ fixed.acidity+volatile.acidity+citric.acid+residual.sugar+chlorides+free.sulfur.dioxide+total.sulfur.dioxide+density+pH+sulphates+alcohol, data=wine)
summary(linear_quality)
#########################################
ID3 方法生成树枝
re_id3 <-rpart(quality~.,data=wine_train,method="class", parms=list(split="information"))
fancyRpartPlot(dtree)
########################################
CART 方法生成树枝
re_CART = rpart(quality~.,data= wine_train,method = "class",parms = list(split="gini"),con