单纯贝叶斯分类法

  1. import the data
data(Titanic)
tr(Titanic)
# 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
# - attr(*, "dimnames")=List of 4
# ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
# ..$ Sex     : chr [1:2] "Male" "Female"
# ..$ Age     : chr [1:2] "Child" "Adult"
# ..$ Survived: chr [1:2] "No" "Yes"
  1. convert the array into a data frame
countsToCases <- function(x, countcol = "Freq") {
  # Get the row indices to pull from x
  idx <- rep.int(seq_len(nrow(x)), x[[countcol]])
  # Drop count column
  x[[countcol]] <- NULL
  # Get the rows from x
  x[idx, ]
}

caseTita<-countsToCases(as.data.frame(Titanic))
head(caseTita)
# Class  Sex   Age Survived
# 3     3rd Male Child       No
# 3.1   3rd Male Child       No
# 3.2   3rd Male Child       No
# 3.3   3rd Male Child       No
# 3.4   3rd Male Child       No
# 3.5   3rd Male Child       No

nrow(caseTita)
# [1] 2201
  1. Naïve Bayes classification
library(e1071)
model <- naiveBayes(Survived ~ ., data = caseTita)
predict(model, caseTita[sample(1:2201,10,replace=FALSE),])
# [1] No  No  No  No  No  No  Yes No  Yes No 
# Levels: No Yes
predict(model, caseTita[sample(1:2201,10,replace=FALSE),],type="raw")
# No       Yes
# [1,] 0.7247820 0.2752180
# [2,] 0.6960593 0.3039407
# [3,] 0.8466171 0.1533829
# [4,] 0.3679509 0.6320491
# [5,] 0.8466171 0.1533829
# [6,] 0.7247820 0.2752180
# [7,] 0.8466171 0.1533829
# [8,] 0.3523184 0.6476816
# [9,] 0.8552217 0.1447783
# [10,] 0.8466171 0.1533829

m <- naiveBayes(Survived ~ ., data = Titanic)
m

# Naive Bayes Classifier for Discrete Predictors
# 
# Call:
#   naiveBayes.formula(formula = Survived ~ ., data = Titanic)
# 
# A-priori probabilities:
#   Survived
# No      Yes 
# 0.676965 0.323035 
# 
# Conditional probabilities:
#   Class
# Survived        1st        2nd        3rd       Crew
# No  0.08187919 0.11208054 0.35436242 0.45167785
# Yes 0.28551336 0.16596343 0.25035162 0.29817159
# 
# Sex
# Survived       Male     Female
# No  0.91543624 0.08456376
# Yes 0.51617440 0.48382560
# 
# Age
# Survived      Child      Adult
# No  0.03489933 0.96510067
# Yes 0.08016878 0.91983122
  1. split the data into the predictor data frame and outcome vector
library(caret)
x<-caseTita[,-4]
y<-caseTita$Survived

model1 <- train(x,y,'nb',trControl=trainControl(method='cv',number=10))
model1
# Naive Bayes 
# 
# 2201 samples
# 3 predictor
# 2 classes: 'No', 'Yes' 
# 
# No pre-processing
# Resampling: Cross-Validated (10 fold) 
# Summary of sample sizes: 1981, 1981, 1981, 1981, 1981, 1981, ... 
# Resampling results across tuning parameters:
#   
#   usekernel  Accuracy   Kappa    
# FALSE      0.7791814  0.4474594
# TRUE      0.7791814  0.4474594
# 
# Tuning parameter 'fL' was held constant at a value of 0
# Tuning parameter 'adjust' was held
# constant at a value of 1
# Accuracy was used to select the optimal model using the largest value.
# The final values used for the model were fL = 0, usekernel = FALSE and adjust = 1.
  1. predict the outcome
predict(model1$finalModel,caseTita[sample(1:2201,10,replace=FALSE),])$class
# 27.63 12.225  30.44 12.630  11.24  15.38   9.76  31.15 10.150  10.53 
# No     No    Yes     No     No    Yes     No    Yes     No     No 
# Levels: No Yes
table(predict(model1$finalModel,x)$class,y)
#        y
#      No  Yes
# No  1364  362
# Yes  126  349

Reference:
Zhang Zhongheng Naïve Bayes classification in R

你可能感兴趣的:(单纯贝叶斯分类法)