SUPPORT VECTOR MACHINE CLASSIFIER IMPLEMENTATION IN R WITH CARET PACKAGE
http://dataaspirant.com/2017/01/19/support-vector-machine-classifier-implementation-r-caret-package/
step1 下载包,下载数据
# BiocManager::install('caret')
library(caret)
### 下载一个数据集heart_tidy.csv
heart_df<- read.csv('./heart_tidy.csv',sep = ',',header = FALSE)
head(heart_df)
step2 设置测试数据和验证数据
##### 设置测试数据和验证数据 70%的traing data和30%的test数据p=0.7
set.seed(3033)
## The **set.seed()** method is used to make our work replicable.
intrain<- createDataPartition(y=heart_df$V14,p = 0.7, list = FALSE)
training<- heart_df[intrain,]
testing<- heart_df[-intrain,]
dim(training)
dim(testing)
step3 开始前的数据准备
############# 检查有无缺失的数据
anyNA(heart_df)
##############
summary(heart_df)
#############将靶变量转化成factor
training[['V14']]<- factor(training[['V14']])
## step4 training the SVM model
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(3233)
svm_Linear <- train(V14 ~., data = training, method = "svmLinear",
trControl=trctrl,
preProcess = c("center", "scale"),
tuneLength = 10)
svm_Linear
It’s a linear model therefore, it just tested at value “C” =1.
Step 5 验证数据testing data Predicting the results
# Now, our model is trained with C value as 1. We are ready to predict classes for our test set. We can use predict() method.
test_pred <- predict(svm_Linear, newdata = testing)
test_pred
# How Accurately our model is working?
test_pred
class(testing$V14)
testing$V14<- factor(testing$V14) ### 必须都是factor 否则报错
confusionMatrix(test_pred, testing$V14 )
Step6 By following the above procedure we can build our svmLinear classifier. 线性
grid <- expand.grid(C = c(0,0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2,5))
# 这里设定C值
set.seed(3233)
svm_Linear_Grid <- train(V14 ~., data = training, method = "svmLinear",
trControl=trctrl,
preProcess = c("center", "scale"),
tuneGrid = grid,
tuneLength = 10)
svm_Linear_Grid
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 0.05.
plot(svm_Linear_Grid)
step7 test set验证
test_pred_grid <- predict(svm_Linear_Grid, newdata = testing)
test_pred_grid
confusionMatrix(test_pred_grid, testing$V14 )
step8 SVM Classifier using Non-Linear Kernel 非线性
set.seed(3233)
svm_Radial <- train(V14 ~., data = training, method = "svmRadial",
trControl=trctrl,
preProcess = c("center", "scale"),
tuneLength = 10)
svm_Radial
plot(svm_Radial)
test_pred_Radial <- predict(svm_Radial, newdata = testing)
confusionMatrix(test_pred_Radial, testing$V14 )
grid_radial <- expand.grid(sigma = c(0,0.01, 0.02, 0.025, 0.03, 0.04,
0.05, 0.06, 0.07,0.08, 0.09, 0.1, 0.25, 0.5, 0.75,0.9),
C = c(0,0.01, 0.05, 0.1, 0.25, 0.5, 0.75,
1, 1.5, 2,5))
set.seed(3233)
svm_Radial_Grid <- train(V14 ~., data = training, method = "svmRadial",
trControl=trctrl,
preProcess = c("center", "scale"),
tuneGrid = grid_radial,
tuneLength = 10)
svm_Radial_Grid
plot(svm_Radial_Grid)
test_pred_Radial_Grid <- predict(svm_Radial_Grid, newdata = testing)
confusionMatrix(test_pred_Radial_Grid, testing$V14 )