c5.0决策树算法

library(C50)
set.seed(12345)
setwd("C:\\Users\\11565\\Desktop")
credit <- read.csv("credit.csv",header = T)
credit$A16 <- factor(credit$A16,levels = c("+","-"),labels = c("批准","不批准"))
runif(1000)#在01 之间随机产生1000个数字
x11 <-(runif(691))#生成的是整数型数据,可以用class函数验证
X12 <- as.numeric(x11)#转换成数字型
head(credit)
dim(credit)
credit_new <- data.frame(credit,X12)#生成新的数据框
credit_rand <-credit_new[order(credit_new$X12),] #按照x12的顺序对原数据框进行
#任意排序,从而可以随机抽取数据
credit_train <- credit_rand[1:580,-c(16,17)]
credit_test <- credit_rand[581:691,-c(16,17)]
credit_train_labels <- credit_rand[1:580,16]
credit_test_labels <- credit_rand[581:691,16]
prop.table(table(credit_train_labels))#检查一下测试集和训练集的数据结构
credit_model <- C5.0(credit_train,credit_train_labels)
credit_model <- C5.0(credit_train,credit_train_labels)#因为有缺失值  所以没有算出来
credit_predict <- predict(credit_model,credit_test)
library(gmodels)
CrossTable(credit_predict,credit_test_labels)

你可能感兴趣的:(数据挖掘与R语言)