3 SVM CLASSIFIER R包(caret)

SUPPORT VECTOR MACHINE CLASSIFIER IMPLEMENTATION IN R WITH CARET PACKAGE

http://dataaspirant.com/2017/01/19/support-vector-machine-classifier-implementation-r-caret-package/

step1 下载包,下载数据

# BiocManager::install('caret')
library(caret)

### 下载一个数据集heart_tidy.csv
heart_df<- read.csv('./heart_tidy.csv',sep = ',',header = FALSE)
head(heart_df)

step2 设置测试数据和验证数据

##### 设置测试数据和验证数据 70%的traing data和30%的test数据p=0.7
set.seed(3033)
## The **set.seed()** method is used to make our work replicable. 
intrain<- createDataPartition(y=heart_df$V14,p = 0.7, list = FALSE)
training<- heart_df[intrain,]
testing<- heart_df[-intrain,]
dim(training)
dim(testing)

step3 开始前的数据准备

############# 检查有无缺失的数据
anyNA(heart_df)
##############
summary(heart_df)
#############将靶变量转化成factor
training[['V14']]<- factor(training[['V14']])

## step4 training the SVM model

trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(3233)

svm_Linear <- train(V14 ~., data = training, method = "svmLinear",
                    trControl=trctrl,
                    preProcess = c("center", "scale"),
                    tuneLength = 10)

svm_Linear

image.png

It’s a linear model therefore, it just tested at value “C” =1.

Step 5 验证数据testing data Predicting the results

# Now, our model is trained with C value as 1. We are ready to predict classes for our test set. We can use predict() method.

test_pred <- predict(svm_Linear, newdata = testing)
test_pred
# How Accurately our model is working?
test_pred
class(testing$V14)
testing$V14<- factor(testing$V14)  ### 必须都是factor 否则报错
confusionMatrix(test_pred, testing$V14 )


image.png

Step6 By following the above procedure we can build our svmLinear classifier. 线性

grid <- expand.grid(C = c(0,0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2,5))
# 这里设定C值
set.seed(3233)
svm_Linear_Grid <- train(V14 ~., data = training, method = "svmLinear",
                         trControl=trctrl,
                         preProcess = c("center", "scale"),
                         tuneGrid = grid,
                         tuneLength = 10)
svm_Linear_Grid

## Accuracy was used to select the optimal model using  the largest value.
## The final value used for the model was C = 0.05.
plot(svm_Linear_Grid)
取C值

step7 test set验证

test_pred_grid <- predict(svm_Linear_Grid, newdata = testing)
test_pred_grid
confusionMatrix(test_pred_grid, testing$V14 )

testing set验证结果

step8 SVM Classifier using Non-Linear Kernel 非线性

set.seed(3233)
svm_Radial <- train(V14 ~., data = training, method = "svmRadial",
                    trControl=trctrl,
                    preProcess = c("center", "scale"),
                    tuneLength = 10)
svm_Radial
plot(svm_Radial)
test_pred_Radial <- predict(svm_Radial, newdata = testing)
confusionMatrix(test_pred_Radial, testing$V14 )
grid_radial <- expand.grid(sigma = c(0,0.01, 0.02, 0.025, 0.03, 0.04,
                                     0.05, 0.06, 0.07,0.08, 0.09, 0.1, 0.25, 0.5, 0.75,0.9),
                           C = c(0,0.01, 0.05, 0.1, 0.25, 0.5, 0.75,
                                 1, 1.5, 2,5))
set.seed(3233)
svm_Radial_Grid <- train(V14 ~., data = training, method = "svmRadial",
                         trControl=trctrl,
                         preProcess = c("center", "scale"),
                         tuneGrid = grid_radial,
                         tuneLength = 10)
svm_Radial_Grid
plot(svm_Radial_Grid)
test_pred_Radial_Grid <- predict(svm_Radial_Grid, newdata = testing)
confusionMatrix(test_pred_Radial_Grid, testing$V14 )

你可能感兴趣的:(3 SVM CLASSIFIER R包(caret))