大数据项目3:自动创建N棵决策树脚本

ctreeN <- function(n) {
#1.读取数据
cup98 <- read.csv(“F:\R\Rworkspace\cup98lrn/cup98lrn.txt”)
#2.设置训练集和测试集比例
LoopNum <- n
trainPercentage <- 30
testPercentage <- 20
restPercentage <- 100 - trainPercentage - testPercentage
#3.选择变量
varSet <- c(
#demographics
“ODATEDW”, “OSOURCE”, “STATE”, “ZIP”, “PVASTATE”, “DOB”, “RECINHSE”, “MDMAUD”,
“DOMAIN”, “CLUSTER”, “AGE”, “HOMEOWNR”, “CHILD03”, “CHILD07”, “CHILD12”, “CHILD18”,
“NUMCHLD”, “INCOME”, “GENDER”, “WEALTH1”, “HIT”,
#donor interests
“COLLECT1”, “VETERANS”, “BIBLE”, “CATLG”, “HOMEE”, “PETS”, “CDPLAY”, “STEREO”,
“PCOWNERS”, “PHOTO”, “CRAFTS”, “FISHER”, “GARDENIN”, “BOATS”, “WALKER”, “KIDSTUFF”,
“CARDS”, “PLATES”, “PEPSTRFL”,
#summary variables of promotion history
“CARDPROM”, “MAXADATE”, “NUMPROM”, “CARDPM12”, “NUMPRM12”,
#summary variables of giving history
“RAMNTALL”, “NGIFTALL”, “CARDGIFT”, “MINRAMNT”, “MAXRAMNT”, “LASTGIFT”, “LASTDATE”,
“FISTDATE”, “TIMELAG”, “AVGGIFT”,
#ID & targets
“CONTROLN”, “TARGET_B”, “TARGET_D”, “HPHONE_D”,
#RFA
“RFA_2F”, “RFA_2A”, “MDMAUD_R”, “MDMAUD_F”, “MDMAUD_A”,
#OTHERS
“CLUSTER2”, “GEOCODE2”)
vars <- setdiff(varSet, c(“TARGET_D”, “CONTROLN”, “ZIP”, “OSOURCE”))
library(party)

for(loopCnt in 1:LoopNum) {
#1.输出当前日期和循环数
cat(date(), “:iteration = “, loopCnt, “\n”)

#2.创建训练集和测试集数据
ind <- sample(3, nrow(cup98),replace=T, prob=c(trainPercentage, testPercentage, restPercentage))
trainData <- cup98[ind==1, vars]
testData <- cup98[ind==2, vars]
fileName <- paste("cup98-ctree", trainPercentage, testPercentage, loopCnt, sep="-")

#3.创建模型:
myCtree <- NULL
startTime <- Sys.time()
myCtree <- ctree(TARGET_B~., data=trainData)
print(Sys.time() - startTime)
print(object.size(myCtree), units="Mb")

#4.查看模型的大小并保存
save(myCtree, file=paste("F:\\R\\Rworkspace/项目/", fileName, ".rdata", sep=""))

#5.画出决策树图
pdf(paste("F:\\R\\Rworkspace/项目/", fileName, ".pdf", sep=""))
plot(myCtree, type="simple",  ip_args=list(pval=F), ep_args=list(digits=0), main=fileName)
graphics.off()

}
}

你可能感兴趣的:(数据挖掘,大数据,机器学习,决策树)