8.1、支持向量机实例

实例一、对鸢尾花使用SVM进行分类

#1、加载数据
data(iris)

#2、创建测试集和训练集数据
index <- sample(1:2, nrow(iris), prob=c(0.8, 0.2), replace=T)
train_iris <- iris[index==1, ]
test_iris <- iris[index==2, ]

#3、建模
library(e1071)
model_iris <- svm(Species~., data=train_iris, type="C-classification", cost=10, kernal="radial", gamma=0.1, scale=F)

#4、模型评估
model_iris
## 
## Call:
## svm(formula = Species ~ ., data = train_iris, type = "C-classification", 
##     cost = 10, kernal = "radial", gamma = 0.1, scale = F)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  10 
##       gamma:  0.1 
## 
## Number of Support Vectors:  26
pred <- predict(model_iris, train_iris)
mean(pred==train_iris[, 5])
## [1] 0.983871
table(pred, train_iris[, 5])
##             
## pred         setosa versicolor virginica
##   setosa         44          0         0
##   versicolor      0         39         0
##   virginica       0          2        39
#5、预测
pred_iris <- predict(model_iris, test_iris)
mean(pred_iris==test_iris[, 5])
## [1] 1
table(pred_iris, test_iris[, 5])
##             
## pred_iris    setosa versicolor virginica
##   setosa          6          0         0
##   versicolor      0          9         0
##   virginica       0          0        11
#6、修改cost值来查看新的结果,默认为10-fold CV
model_iris1 <- svm(Species~., train_iris, kernal="radial", cost=0.1, scale = F)
pred1 <- predict(model_iris1, test_iris)
mean(pred1==test_iris[, 5])
## [1] 0.9615385
table(pred1, test_iris[, 5])
##             
## pred1        setosa versicolor virginica
##   setosa          6          0         0
##   versicolor      0          9         1
##   virginica       0          0        10
#7、使用tune()函数调整cost的值,默认为10-fold CV
model_tune <- tune(svm, Species~., data=train_iris, kernal="radial", ranges=list(cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100)))
summary(model_tune)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.04871795 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1 1e-03 0.73205128 0.11830739
## 2 1e-02 0.73205128 0.11830739
## 3 1e-01 0.11282051 0.12390215
## 4 1e+00 0.04871795 0.05751000
## 5 5e+00 0.05705128 0.05567459
## 6 1e+01 0.05705128 0.06813861
## 7 1e+02 0.05641026 0.06619105
str(model_tune)
## List of 8
##  $ best.parameters :'data.frame':    1 obs. of  1 variable:
##   ..$ cost: num 1
##   ..- attr(*, "out.attrs")=List of 2
##   .. ..$ dim     : Named int 7
##   .. .. ..- attr(*, "names")= chr "cost"
##   .. ..$ dimnames:List of 1
##   .. .. ..$ cost: chr [1:7] "cost=1e-03" "cost=1e-02" "cost=1e-01" "cost=1e+00" ...
##  $ best.performance: num 0.0487
##  $ method          : chr "svm"
##  $ nparcomb        : int 7
##  $ train.ind       :List of 10
##   ..$ (0.877,13.3]: int [1:111] 103 113 64 9 26 74 90 20 85 78 ...
##   ..$ (13.3,25.6] : int [1:112] 111 66 91 15 14 63 46 124 119 12 ...
##   ..$ (25.6,37.9] : int [1:112] 111 66 91 15 14 63 46 124 119 12 ...
##   ..$ (37.9,50.2] : int [1:111] 111 66 91 15 14 63 46 124 119 12 ...
##   ..$ (50.2,62.5] : int [1:112] 111 66 91 15 14 63 46 124 119 12 ...
##   ..$ (62.5,74.8] : int [1:112] 111 66 91 15 14 63 46 124 119 12 ...
##   ..$ (74.8,87.1] : int [1:111] 111 66 91 15 14 63 46 124 119 12 ...
##   ..$ (87.1,99.4] : int [1:112] 111 66 91 15 14 63 46 124 119 12 ...
##   ..$ (99.4,112]  : int [1:112] 111 66 91 15 14 63 46 124 119 12 ...
##   ..$ (112,124]   : int [1:111] 111 66 91 15 14 63 46 124 119 12 ...
##   ..- attr(*, "dim")= int 10
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ : chr [1:10] "(0.877,13.3]" "(13.3,25.6]" "(25.6,37.9]" "(37.9,50.2]" ...
##  $ sampling        : chr "10-fold cross validation"
##  $ performances    :'data.frame':    7 obs. of  3 variables:
##   ..$ cost      : num [1:7] 1e-03 1e-02 1e-01 1e+00 5e+00 1e+01 1e+02
##   ..$ error     : num [1:7] 0.7321 0.7321 0.1128 0.0487 0.0571 ...
##   ..$ dispersion: num [1:7] 0.1183 0.1183 0.1239 0.0575 0.0557 ...
##  $ best.model      :List of 30
##   ..$ call           : language best.tune(method = svm, train.x = Species ~ ., data = train_iris,      ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10, 100)), kernal = "radial")
##   ..$ type           : num 0
##   ..$ kernel         : num 2
##   ..$ cost           : num 1
##   ..$ degree         : num 3
##   ..$ gamma          : num 0.25
##   ..$ coef0          : num 0
##   ..$ nu             : num 0.5
##   ..$ epsilon        : num 0.1
##   ..$ sparse         : logi FALSE
##   ..$ scaled         : logi [1:4] TRUE TRUE TRUE TRUE
##   ..$ x.scale        :List of 2
##   .. ..$ scaled:center: Named num [1:4] 5.8 3.06 3.66 1.15
##   .. .. ..- attr(*, "names")= chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
##   .. ..$ scaled:scale : Named num [1:4] 0.82 0.437 1.751 0.755
##   .. .. ..- attr(*, "names")= chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
##   ..$ y.scale        : NULL
##   ..$ nclasses       : int 3
##   ..$ levels         : chr [1:3] "setosa" "versicolor" "virginica"
##   ..$ tot.nSV        : int 46
##   ..$ nSV            : int [1:3] 8 19 19
##   ..$ labels         : int [1:3] 1 2 3
##   ..$ SV             : num [1:46, 1:4] -1.709 -0.124 -0.49 -0.855 -0.977 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:46] "9" "16" "21" "24" ...
##   .. .. ..$ : chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
##   ..$ index          : int [1:46] 9 15 19 21 23 29 37 38 45 46 ...
##   ..$ rho            : num [1:3] -0.0606 0.0842 0.0634
##   ..$ compprob       : logi FALSE
##   ..$ probA          : NULL
##   ..$ probB          : NULL
##   ..$ sigma          : NULL
##   ..$ coefs          : num [1:46, 1:2] 0.0895 0.8159 0 0.6514 0.6058 ...
##   ..$ na.action      : NULL
##   ..$ fitted         : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "names")= chr [1:124] "1" "2" "3" "4" ...
##   ..$ decision.values: num [1:124, 1:3] 1.19 1.06 1.17 1.1 1.17 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:124] "1" "2" "3" "4" ...
##   .. .. ..$ : chr [1:3] "setosa/versicolor" "setosa/virginica" "versicolor/virginica"
##   ..$ terms          :Classes 'terms', 'formula' length 3 Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
##   .. .. ..- attr(*, "variables")= language list(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)
##   .. .. ..- attr(*, "factors")= int [1:5, 1:4] 0 1 0 0 0 0 0 1 0 0 ...
##   .. .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. .. ..$ : chr [1:5] "Species" "Sepal.Length" "Sepal.Width" "Petal.Length" ...
##   .. .. .. .. ..$ : chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
##   .. .. ..- attr(*, "term.labels")= chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
##   .. .. ..- attr(*, "order")= int [1:4] 1 1 1 1
##   .. .. ..- attr(*, "intercept")= num 0
##   .. .. ..- attr(*, "response")= int 1
##   .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##   .. .. ..- attr(*, "predvars")= language list(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)
##   .. .. ..- attr(*, "dataClasses")= Named chr [1:5] "factor" "numeric" "numeric" "numeric" ...
##   .. .. .. ..- attr(*, "names")= chr [1:5] "Species" "Sepal.Length" "Sepal.Width" "Petal.Length" ...
##   ..- attr(*, "class")= chr [1:2] "svm.formula" "svm"
##  - attr(*, "class")= chr "tune"
#获取最好的模型
model_best <- model_tune$best.model
summary(model_best)
## 
## Call:
## best.tune(method = svm, train.x = Species ~ ., data = train_iris, 
##     ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10, 100)), 
##     kernal = "radial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.25 
## 
## Number of Support Vectors:  46
## 
##  ( 8 19 19 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica
#预测
pred_tune <- predict(model_best, test_iris)
mean(pred_tune==test_iris[, 5])
## [1] 1
table(pred_tune, test_iris[, 5])
##             
## pred_tune    setosa versicolor virginica
##   setosa          6          0         0
##   versicolor      0          9         0
##   virginica       0          0        11

实例二:寻找最优参数

使用支持向量机实现二元分类器,使用的数据是来自MASS包的cats数据集。在本例中你将尝试使用体重和心脏重量来预测一只猫的性别。

#1、加载数据
data(cats, package="MASS")
str(cats)
## 'data.frame':    144 obs. of  3 variables:
##  $ Sex: Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Bwt: num  2 2 2 2.1 2.1 2.1 2.1 2.1 2.1 2.1 ...
##  $ Hwt: num  7 7.4 9.5 7.2 7.3 7.6 8.1 8.2 8.3 8.5 ...
summary(cats)
##  Sex         Bwt             Hwt       
##  F:47   Min.   :2.000   Min.   : 6.30  
##  M:97   1st Qu.:2.300   1st Qu.: 8.95  
##         Median :2.700   Median :10.10  
##         Mean   :2.724   Mean   :10.63  
##         3rd Qu.:3.025   3rd Qu.:12.12  
##         Max.   :3.900   Max.   :20.50
#2、创建训练集和测试集数据
index <- sample(1:2, nrow(cats), prob=c(0.7, 0.3), replace=T)
train_cats <- cats[index==1, ]
test_cats <- cats[index==2, ]

#3、建模
library(e1071)
#1)线性核函数linear SVM
model_linear <- svm(Sex~., train_cats, kernal="linear", cost=10, scale = F)

#预测
pred <- predict(model_linear, test_cats[])
mean(pred==test_cats$Sex)
## [1] 0.7560976
table(pred, test_cats$Sex)
##     
## pred  F  M
##    F  7  6
##    M  4 24
print(model_linear)
## 
## Call:
## svm(formula = Sex ~ ., data = train_cats, kernal = "linear", 
##     cost = 10, scale = F)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  10 
##       gamma:  0.5 
## 
## Number of Support Vectors:  55
#2)创建径向支持向量机:radial SVM
model_radial <- svm(Sex~., train_cats, kernal="radial", cost=10, scale=F)

#预测
pred <- predict(model_radial, test_cats)
mean(pred==test_cats$Sex)
## [1] 0.7560976
table(pred, test_cats$Sex)
##     
## pred  F  M
##    F  7  6
##    M  4 24
print(model_radial)
## 
## Call:
## svm(formula = Sex ~ ., data = train_cats, kernal = "radial", 
##     cost = 10, scale = F)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  10 
##       gamma:  0.5 
## 
## Number of Support Vectors:  55
#4、寻找最优参数:可以使用tune.svm()函数,来寻找svm()函数的最优参数。
model_tuned <- tune.svm(Sex~., data=train_cats, gamma=10^(-6:-1),cost=10^(1:2))
summary(model_tuned)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  gamma cost
##  0.001  100
## 
## - best performance: 0.2172727 
## 
## - Detailed performance results:
##    gamma cost     error dispersion
## 1  1e-06   10 0.3463636  0.2105330
## 2  1e-05   10 0.3463636  0.2105330
## 3  1e-04   10 0.3463636  0.2105330
## 4  1e-03   10 0.3681818  0.2027474
## 5  1e-02   10 0.2272727  0.1177537
## 6  1e-01   10 0.2363636  0.1363973
## 7  1e-06  100 0.3463636  0.2105330
## 8  1e-05  100 0.3463636  0.2105330
## 9  1e-04  100 0.3681818  0.2027474
## 10 1e-03  100 0.2172727  0.1243865
## 11 1e-02  100 0.2272727  0.1432920
## 12 1e-01  100 0.2354545  0.1426143
#结果证明,当cost为10,gamma为0.1时产生最小的错误率。利用这些参数训练径向支持向量机。

#5、利用调整后的最优参数:cost=10, gamma=0.1重新建模
model_cats <- svm(Sex~., train_cats, kernal="radial", cost=10, gamma=0.1, scale = F)

#6、预测
pred <- predict(model_cats, test_cats)
mean(pred==test_cats$Sex)
## [1] 0.7560976
table(pred, test_cats$Sex)
##     
## pred  F  M
##    F  8  7
##    M  3 23

实例三:kernlab包中的ksvm()函数实现支持向量机

#1、加载数据
data(iris)

#2、创建训练集和测试集数据
index <- sample(1:2, nrow(iris), prob=c(0.7, 0.3), replace = T)
train_iris <- iris[index==1, ]
test_iris <- iris[index==2, ]

#3、建模
library(kernlab)
model <- ksvm(Species~., train_iris, kernal="rbfdot", type="C-bsvc", kpar=list(sigma=0.1), C=10, prob.model=T)

#4、预测
pred <- predict(model, test_iris)
mean(pred==test_iris[, 5])
## [1] 0.9642857
table(pred, test_iris[, 5])
##             
## pred         setosa versicolor virginica
##   setosa         18          0         0
##   versicolor      0         19         0
##   virginica       0          2        17

你可能感兴趣的:(机器学习,支持向量机)