kmeans算法java实现

《数据挖掘》平时实验作业,只提供代码和数据。

代码:

package com.outsider.kmeans;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

public class Kmeans {
	
	public void run(int k ,double[][] data, int maxIter) {
		// 0 随机选出k个样本作为初始中心
		Set indices = new HashSet<>(k);
		while(indices.size() != k) {
			int index = (int) (Math.random()*(data.length -1));
			indices.add(index);
		}
		double[][] center = new double[k][];
		int c = 0;
		for(int index : indices) {
			center[c] = data[index];
			c++;
		}
		//1迭代
		//1.1 将样本分类到距离最近的中心
		//1.2 修正中心为当前簇的平均值
		//1.3 若达到最大迭代次数或者中心和上次相比没有变化,则结束
		int i = 0;
		while(true) {
			Object[] rs = classify(center, data);
			int[] labels = (int[]) rs[0];
			int[] count = (int[]) rs[1];
			double[][] newCenter = (double[][]) rs[2];
			//labels,count,newCenter
			//比较新的中心是否和旧的一样
			boolean convergent  = true;
			for(int j = 0; j < center.length; j++) {
				for(int m = 0; m < center.length; m++) {
					if(center[j][m] != newCenter[j][m])
						convergent = false;
				}
			}
			i++;
			System.out.println("iter "+i);
			if(convergent) {
				printResult(count, newCenter, data.length);
				break;
			}
			center = newCenter;
			if(i >= maxIter) {
				printResult(count, newCenter, data.length);
				break;
			}
		}
	}
	
	public void printResult(int[] count, double[][] center, int dataLen) {
		for(int i = 0; i < center.length; i++) {
			System.out.println("class "+i+":"+(count[i] *1.0 / dataLen) + " " +Arrays.toString(center[i]));
		}
	}
	
	/**
	 * 根据当前中心划分类别
	 * 并返回新的中心
	 * @param center
	 * @param data
	 * @return object数组,只包含3个元素,第一个数据的类别标签,第2个个类别的个数,第3个新的簇中心,
	 */
	public Object[] classify(double[][] center, double[][] data) {
		int[] labels = new int[data.length];
		int[] count = new int[center.length];
		for(int i = 0; i < data.length; i++) {
			double minDist =distance(center[0], data[i]);
			for(int j = 1; j < center.length; j++) {
				double dist = distance(center[j], data[i]);
				if(dist < minDist) {
					minDist = dist;
					labels[i] = j;
				}
			}
		}
		//计算新的中心
		double[][] newCenter = new double[center.length][center[0].length];
		for(int i = 0; i < data.length; i++) {
			count[labels[i]]++;
			for(int j = 0; j < data[0].length; j++) {
				newCenter[labels[i]][j] += data[i][j];
			}
		}
		for(int i = 0; i < newCenter.length; i++) {
			for(int j = 0; j < newCenter[0].length; j++) {
				newCenter[i][j] = newCenter[i][j] / count[i];
			}
		}
		return new Object[] {labels,count,newCenter};
	}
	
	
	/**
	 * 计算向量之间的欧式距离
	 * @param v1
	 * @param v2
	 * @return
	 */
	public double distance(double[] v1, double[] v2) {
		double sum = 0;
		for(int i = 0; i < v1.length; i++) {
			sum += Math.pow(v1[i]-v2[i], 2);
		}
		return Math.sqrt(sum);
	}
	
	public static double[][] loadIrisData(){
		double [][] data = new double[150][];
		BufferedReader reader2 = null;
		try {
			FileReader reader = new FileReader("./data/iris.arff");
			reader2 = new BufferedReader(reader);
			String line = null;
			int count = 0;
			while((line = reader2.readLine()) != null) {
				String[] strs = line.split(",");
				double[] sample = new double[4];
				for(int i = 0; i < 4; i++) {
					sample[i] = Double.parseDouble(strs[i]);
				}
				data[count++] = sample;
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				reader2.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		return data;
	}
	
	public static void main(String[] args) {
		Kmeans kmeans = new Kmeans();
		double[][] data = loadIrisData();
		kmeans.run(3, data, Integer.MAX_VALUE);
		//double[][] data2 = new double[][] {{1,2,1},{2,1,2},{3,3,3},{4,4,4}};
		//kmeans.run(2, data2, Integer.MAX_VALUE);
		
	}
}

数据是iris:

5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica

 

你可能感兴趣的:(大数据&数据挖掘)