R语言实现K-Means算法数据集iris

博主代码根据机器挖掘实战编写的.那本书用的是Python.Step By Step. R语言有函数包实现
也可以调用,里面的四个algorithm我怎么看着都差不多,所以我决定写一下,明天仔细看一下
kmeans(x, centers, iter.max = 10, nstart = 1,
algorithm = c(“Hartigan-Wong”, “Lloyd”, “Forgy”,
“MacQueen”))

代码
#加载iris函数
loadData <- function(){
  data(iris)
  #要把Species列去掉
  dataSet =  iris[,-ncol(iris)]
  return (dataSet)
  }
 #计算欧几里得距离
 distEnclud <- function(vecA,vecB){
   distEnclud = sqrt(apply((vecA-vecB)^2,1,sum))
   return (distEnclud)
 }
 #设置初始族
 randCent <- function(dataSet,k){
   n = ncol(dataSet)
   #建立一个空矩阵
   centroids = matrix(data = NA, nrow = k, ncol = n, byrow = FALSE,dimnames = NULL)
   #随机设置初始族
   for(j in 1:n){
     minJ = min(dataSet[,j])
     rangeJ = max(dataSet[,j]) - minJ
     #随机生成每一列的初始簇
     centroids[,j] = minJ + rangeJ *runif(k)
   }
   return (centroids)
 } 
 kMeans <- function(dataSet,k,distEnclud, randCent){
   m = nrow(dataSet)
   #记录矩阵,一列记录簇索引值,即类别,第二列记录误差
   clusterAssment = matrix(data = 0,  nrow = m , ncol = 2,byrow = FALSE,dimnames = NULL)
   #初始族
   centroids = randCent(dataSet,k)
   #初始簇列名和数据集列名相同
   colnames(centroids) = colnames(dataSet)


   clusterChanged = TRUE
    #设置结束变量,如果为TRUE 说明族中心点和分类结果还有变化,所以要继续分类,若没有改变则设置为FALSE 跳出循环
   while (clusterChanged) {
     clusterChanged = FALSE
     for(i in 1: m){
       minDist = Inf
       minIndex = -1
       for(j in 1:k){
         distJI = distEnclud(dataSet[i,],centroids[j,])
         if(is.na(distJI)) distJI = Inf
         if(distJI < minDist){
           minDist = distJI
           minIndex = j
         }
       }
       if(clusterAssment[i,1] != minIndex){
         clusterChanged = TRUE
         clusterAssment[i,1] = minIndex 
       } 
       clusterAssment[i,2] = minDist^2
     }

     #每计算一遍 都要 updata center 
     for(cent in 1:k){
       ptsCluster = dataSet[which(clusterAssment[,1] == cent),]
       centroids[cent,] = apply(ptsCluster,2,mean)
     }
   }
   #小技巧 R语言返回几个矩阵时 可用list将其组合,返回后再如取list元素一边取出就好
   out = list(clusterAssment = clusterAssment ,centroids = centroids)
   return (out)
 }
###########调用函数代码,只需3行,简单吧
#dataSet = loadData()
#head(dataSet)
#output = kMeans(dataSet,2,distEnclud,randCent)

#绘制Sepal.Length,Sepal.Width 两列
plot(dataSet[c("Sepal.Length","Sepal.Width")],col = output$clusterAssment[,1]) 
points(output$centroids[,1],output$centroids[,2],col = 1:2,pch = 8,cex = 2)
#绘制Petal.Length,Petal.Width 两列
plot(dataSet[c("Petal.Length","Petal.Width")],col = output$clusterAssment[,1]) 
points(output$centroids[,3],output$centroids[,4],col = 1:2,pch = 8,cex = 2)
table(iris$Species, output$clusterAssment[,1])

根据Sepal.Length,Sepal.Width 两列两列画出的图形
R语言实现K-Means算法数据集iris_第1张图片
根据Petal.Length,Petal.Width 两列画出的图形
R语言实现K-Means算法数据集iris_第2张图片

你可能感兴趣的:(机器学习,无监督学习)