具体算法可以参考
http://www.aboutyun.com/thread-18178-1-1.html
鸢尾花卉数据集,是一类多重变量分析的数据集。
每个数据包含4个属性。
可通过花萼长度,花萼宽度,花瓣长度,花瓣宽度4个属性预测鸢尾花卉属于(Setosa,Versicolour,Virginica)三个种类中的哪一类
最后通过K_means算法识别的类别和数据中给定的类别对比计算正确率
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
public class K_means {
//将花分为三类
private List<float[]> K1 = new ArrayList<>();
private List<float[]> K2 = new ArrayList<>();
private List<float[]> K3 = new ArrayList<>();
public static int num;
private static List<float[]> flowerList = new ArrayList<>();
/**
* 输入流导入实验数据
*/
private void initData(){
FileReader reader = null;
try {
reader = new FileReader("input.txt");
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
BufferedReader br = new BufferedReader(reader);
String str;
try {
while((str = br.readLine()) != null){
float[] flower = new float[6];
String[] strArray =str.split("\\s+");
flower[0] = Float.parseFloat(strArray[0]);
flower[1] = Float.parseFloat(strArray[1]);
flower[2] = Float.parseFloat(strArray[2]);
flower[3] = Float.parseFloat(strArray[3]);
flower[4] = Float.parseFloat(strArray[4]);
flower[5] = Float.parseFloat(strArray[5]);
flowerList.add(flower);
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 分别计算每一类的均值
* @return
*/
public float[] kMeans(List<float[]> K){
float[] mean1 = new float[4];
DecimalFormat df = new DecimalFormat(".000");
for(int i = 0; i < K.size();i++){
mean1[0] += K.get(i)[1];
mean1[1] += K.get(i)[2];
mean1[2] += K.get(i)[3];
mean1[3] += K.get(i)[4];
}
mean1[0] = mean1[0]/K.size();
mean1[0] = Float.parseFloat(df.format(mean1[0]));
mean1[1] = mean1[1]/K.size();
mean1[1] = Float.parseFloat(df.format(mean1[1]));
mean1[2] = mean1[2]/K.size();
mean1[2] = Float.parseFloat(df.format(mean1[2]));
mean1[3] = mean1[3]/K.size();
mean1[3] = Float.parseFloat(df.format(mean1[3]));
return mean1;
}
/**
* 选定最小距离
* @param f1
* @param f2
* @param f3
* @return
*/
private static float min(float f1,float f2,float f3) {
float min = 999f;
if(f1 < min)
min = f1;
if(f2 < min)
min = f2;
if(f3 < min)
min = f3;
return min;
}
/**
* 计算距离并划分数据
*/
public void kDistance(){
float[] flowerK1 = new float[]{4.9f,3,1.4f,0.2f}; //初始质心
float[] flowerK2 = new float[]{6.4f,3.2f,4.5f,1.5f};
float[] flowerK3 = new float[]{5.8f,2.7f,5.1f,1.9f};
float D1 = 0f;float D2 = 0f;float D3 = 0f;
while(true){
num = 0;
for(int i = 0;i < flowerList.size(); i++){
D1 = (float) (Math.pow(flowerList.get(i)[1] - flowerK1[0], 2) + Math.pow(flowerList.get(i)[2] - flowerK1[1], 2) + Math.pow(flowerList.get(i)[3] - flowerK1[2], 2) + Math.pow(flowerList.get(i)[4] - flowerK1[3], 2));
D2 = (float) (Math.pow(flowerList.get(i)[1] - flowerK2[0], 2) + Math.pow(flowerList.get(i)[2] - flowerK2[1], 2) + Math.pow(flowerList.get(i)[3] - flowerK2[2], 2) + Math.pow(flowerList.get(i)[4] - flowerK2[3], 2));
D3 = (float) (Math.pow(flowerList.get(i)[1] - flowerK3[0], 2) + Math.pow(flowerList.get(i)[2] - flowerK3[1], 2) + Math.pow(flowerList.get(i)[3] - flowerK3[2], 2) + Math.pow(flowerList.get(i)[4] - flowerK3[3], 2));
if(D1 == min(D1,D2,D3)){
K1.add(flowerList.get(i));
if(flowerList.get(i)[5] == 1.0)
num +=1;
}
if(D2 == min(D1,D2,D3)){
K2.add(flowerList.get(i));
if(flowerList.get(i)[5] == 2.0)
num +=1;
}
if(D3 == min(D1,D2,D3)){
K3.add(flowerList.get(i));
if(flowerList.get(i)[5] == 3.0)
num +=1;
}
}
System.out.println(num); //识别正确的花的数量
double rate = (double)num/(double)flowerList.size();
System.out.println("正确率为:"+rate);
if(flowerK1.equals(kMeans(K1)) &&flowerK2.equals(kMeans(K2))&&flowerK3.equals(kMeans(K3))){
break;
}
else //如果新的质心和原质心相等 算法停止
flowerK1 = kMeans(K1); flowerK2 = kMeans(K2); flowerK3 = kMeans(K3);
System.out.println(kMeans(K1)[0]+" "+kMeans(K1)[1]+" "+kMeans(K1)[2]+" "+kMeans(K1)[3]);
System.out.println(kMeans(K2)[0]+" "+kMeans(K2)[1]+" "+kMeans(K2)[2]+" "+kMeans(K2)[3]);
System.out.println(kMeans(K3)[0]+" "+kMeans(K3)[1]+" "+kMeans(K3)[2]+" "+kMeans(K3)[3]);
}
}
public static void main(String[] args) {
K_means kmeans = new K_means();
kmeans.initData();
kmeans.kDistance();
}
}
部分数据
1 5.1 3.5 1.4 0.2 1
2 4.9 3 1.4 0.2 1
3 4.7 3.2 1.3 0.2 1
4 4.6 3.1 1.5 0.2 1
5 5 3.6 1.4 0.3 1
6 5.4 3.9 1.7 0.4 1
7 4.6 3.4 1.4 0.3 1
8 5 3.4 1.5 0.2 1
9 4.4 2.9 1.4 0.2 1
10 4.9 3.1 1.5 0.1 1
11 5.4 3.7 1.5 0.2 1
12 4.8 3.4 1.6 0.2 1
13 4.8 3 1.4 0.1 1
14 4.3 3 1.1 0.1 1
15 5.8 4 1.2 0.2 1
16 5.7 4.4 1.5 0.4 1
17 5.4 3.9 1.3 0.4 1
18 5.1 3.5 1.4 0.3 1
19 5.7 3.8 1.7 0.3 1
20 5.1 3.8 1.5 0.3 1
21 5.4 3.4 1.7 0.2 1
22 5.1 3.7 1.5 0.4 1
23 4.6 3.6 1 0.2 1
24 5.1 3.3 1.7 0.5 1
25 4.8 3.4 1.9 0.2 1
26 5 3 1.6 0.2 1
27 5 3.4 1.6 0.4 1
28 5.2 3.5 1.5 0.2 1
29 5.2 3.4 1.4 0.2 1
30 4.7 3.2 1.6 0.2 1
31 4.8 3.1 1.6 0.2 1
32 5.4 3.4 1.5 0.4 1
33 5.2 4.1 1.5 0.1 1
34 5.5 4.2 1.4 0.2 1
35 4.9 3.1 1.5 0.2 1
36 5 3.2 1.2 0.2 1
37 5.5 3.5 1.3 0.2 1
38 4.9 3.6 1.4 0.1 1
39 4.4 3 1.3 0.2 1
40 5.1 3.4 1.5 0.2 1
41 5 3.5 1.3 0.3 1
42 4.5 2.3 1.3 0.3 1
43 4.4 3.2 1.3 0.2 1
44 5 3.5 1.6 0.6 1
45 5.1 3.8 1.9 0.4 1
46 4.8 3 1.4 0.3 1
47 5.1 3.8 1.6 0.2 1
48 4.6 3.2 1.4 0.2 1
49 5.3 3.7 1.5 0.2 1
50 5 3.3 1.4 0.2 1
51 7 3.2 4.7 1.4 2
52 6.4 3.2 4.5 1.5 2
53 6.9 3.1 4.9 1.5 2
54 5.5 2.3 4 1.3 2
55 6.5 2.8 4.6 1.5 2
56 5.7 2.8 4.5 1.3 2
57 6.3 3.3 4.7 1.6 2
58 4.9 2.4 3.3 1 2
59 6.6 2.9 4.6 1.3 2
60 5.2 2.7 3.9 1.4 2
61 5 2 3.5 1 2
62 5.9 3 4.2 1.5 2
63 6 2.2 4 1 2
64 6.1 2.9 4.7 1.4 2
65 5.6 2.9 3.6 1.3 2
66 6.7 3.1 4.4 1.4 2
67 5.6 3 4.5 1.5 2
68 5.8 2.7 4.1 1 2
69 6.2 2.2 4.5 1.5 2
70 5.6 2.5 3.9 1.1 2
71 5.9 3.2 4.8 1.8 2
72 6.1 2.8 4 1.3 2
73 6.3 2.5 4.9 1.5 2
74 6.1 2.8 4.7 1.2 2
75 6.4 2.9 4.3 1.3 2
76 6.6 3 4.4 1.4 2
77 6.8 2.8 4.8 1.4 2
78 6.7 3 5 1.7 2
79 6 2.9 4.5 1.5 2
80 5.7 2.6 3.5 1 2
81 5.5 2.4 3.8 1.1 2
82 5.5 2.4 3.7 1 2
83 5.8 2.7 3.9 1.2 2
84 6 2.7 5.1 1.6 2
85 5.4 3 4.5 1.5 2
86 6 3.4 4.5 1.6 2
87 6.7 3.1 4.7 1.5 2
88 6.3 2.3 4.4 1.3 2
89 5.6 3 4.1 1.3 2
90 5.5 2.5 4 1.3 2
91 5.5 2.6 4.4 1.2 2
92 6.1 3 4.6 1.4 2
93 5.8 2.6 4 1.2 2
94 5 2.3 3.3 1 2
95 5.6 2.7 4.2 1.3 2
96 5.7 3 4.2 1.2 2
97 5.7 2.9 4.2 1.3 2
98 6.2 2.9 4.3 1.3 2
99 5.1 2.5 3 1.1 2
100 5.7 2.8 4.1 1.3 2
101 6.3 3.3 6 2.5 3
102 5.8 2.7 5.1 1.9 3
103 7.1 3 5.9 2.1 3
104 6.3 2.9 5.6 1.8 3
105 6.5 3 5.8 2.2 3
106 7.6 3 6.6 2.1 3
107 4.9 2.5 4.5 1.7 3
108 7.3 2.9 6.3 1.8 3
109 6.7 2.5 5.8 1.8 3
110 7.2 3.6 6.1 2.5 3
111 6.5 3.2 5.1 2 3
112 6.4 2.7 5.3 1.9 3
113 6.8 3 5.5 2.1 3
114 5.7 2.5 5 2 3
115 5.8 2.8 5.1 2.4 3
116 6.4 3.2 5.3 2.3 3
117 6.5 3 5.5 1.8 3
118 7.7 3.8 6.7 2.2 3
119 7.7 2.6 6.9 2.3 3
120 6 2.2 5 1.5 3
121 6.9 3.2 5.7 2.3 3
122 5.6 2.8 4.9 2 3
123 7.7 2.8 6.7 2 3
124 6.3 2.7 4.9 1.8 3
125 6.7 3.3 5.7 2.1 3
126 7.2 3.2 6 1.8 3
127 6.2 2.8 4.8 1.8 3
128 6.1 3 4.9 1.8 3
129 6.4 2.8 5.6 2.1 3
130 7.2 3 5.8 1.6 3
131 7.4 2.8 6.1 1.9 3
132 7.9 3.8 6.4 2 3
133 6.4 2.8 5.6 2.2 3
134 6.3 2.8 5.1 1.5 3
135 6.1 2.6 5.6 1.4 3
136 7.7 3 6.1 2.3 3
137 6.3 3.4 5.6 2.4 3
138 6.4 3.1 5.5 1.8 3
139 6 3 4.8 1.8 3
140 6.9 3.1 5.4 2.1 3
141 6.7 3.1 5.6 2.4 3
142 6.9 3.1 5.1 2.3 3
143 5.8 2.7 5.1 1.9 3
144 6.8 3.2 5.9 2.3 3
145 6.7 3.3 5.7 2.5 3
146 6.7 3 5.2 2.3 3
147 6.3 2.5 5 1.9 3
148 6.5 3 5.2 2 3
149 6.2 3.4 5.4 2.3 3
150 5.9 3 5.1 1.8 3