ClusterMain.java
package eu.eodigos.kmean; import java.util.Iterator; import java.util.List; import java.util.Vector; import eu.eodigos.hibernate.bean.ClusterInput; import eu.eodigos.hibernate.server.AccessDBServer; import eu.eodigos.hibernate.server.AccessDBServerImp; /** * @author daoger * @version 1.0 * @k-mean Cluster */ public class ClusterMain { public static void main(String[] args) { ClusterMain clusterMain = new ClusterMain(); clusterMain.clusterByDatabase(); } /** * Test with manual data */ public void test() { Vector<DataPoint> dataPoints = new Vector<DataPoint>(); dataPoints.add(new DataPoint(0.12, 0.21, 0.26, 0.45, 0.67, 0.23, 0.11, new Integer(1))); dataPoints.add(new DataPoint(0.22, 0.23, 0.46, 0.11, 0.63, 0.11, 0.12, new Integer(2))); dataPoints.add(new DataPoint(0.32, 0.34, 0.78, 0.17, 0.68, 0.67, 0.13, new Integer(3))); dataPoints.add(new DataPoint(0.42, 0.45, 0.26, 0.42, 0.48, 0.39, 0.14, new Integer(4))); dataPoints.add(new DataPoint(0.52, 0.29, 0.65, 0.59, 0.16, 0.74, 0.15, new Integer(5))); dataPoints.add(new DataPoint(0.62, 0.25, 0.48, 0.61, 0.27, 0.16, 0.67, new Integer(6))); dataPoints.add(new DataPoint(0.72, 0.35, 0.39, 0.20, 0.65, 0.26, 0.17, new Integer(7))); dataPoints.add(new DataPoint(0.82, 0.20, 0.16, 0.29, 0.32, 0.61, 0.18, new Integer(8))); dataPoints.add(new DataPoint(0.92, 0.71, 0.26, 0.37, 0.17, 0.81, 0.19, new Integer(9))); dataPoints.add(new DataPoint(0.13, 0.39, 0.17, 0.41, 0.47, 0.37, 0.10, new Integer(10))); dataPoints.add(new DataPoint(0.14, 0.23, 0.47, 0.93, 0.68, 0.28, 0.29, new Integer(11))); dataPoints.add(new DataPoint(0.15, 0.57, 0.84, 0.19, 0.15, 0.39, 0.39, new Integer(12))); dataPoints.add(new DataPoint(0.16, 0.19, 0.45, 0.38, 0.36, 0.82, 0.49, new Integer(13))); dataPoints.add(new DataPoint(0.17, 0.89, 0.29, 0.39, 0.82, 0.58, 0.59, new Integer(14))); // divide all user to 7 cluster // 10000 stand for precision,the bigger of this value the more accuratly ClusterAssistant clusterAssistant = new ClusterAssistant(3, 10000, dataPoints); clusterAssistant.startAnalysis(); Vector[] v = clusterAssistant.getClusterOutput(); for (int i = 0; i < v.length; i++) { Vector tempV = v[i]; System.out.println("-----------Cluster" + i + "---------"); Iterator iter = tempV.iterator(); while (iter.hasNext()) { DataPoint dpTemp = (DataPoint) iter.next(); String dps = "userid_" + dpTemp.getUserid() + "[" + dpTemp.getAvg1() + "," + dpTemp.getAvg2() + dpTemp.getAvg3() + "," + dpTemp.getAvg4() + "," + dpTemp.getAvg5() + "," + dpTemp.getAvg6() + "," + dpTemp.getAvg7() + "]"; System.out.println(dps); } } } /** * get data from database and calaulate */ public void clusterByDatabase() { AccessDBServer access = new AccessDBServerImp(); Vector<DataPoint> dataPoints = new Vector<DataPoint>(); List clusterList = access.getAllClusterInputData(); for (Iterator iter = clusterList.iterator(); iter.hasNext();) { ClusterInput clusterInput = (ClusterInput) iter.next(); if (clusterInput != null) { dataPoints.add(new DataPoint(clusterInput.getAvgArch(), clusterInput.getAvgMon(), clusterInput .getAvgMus(), clusterInput.getAvgBuil(), clusterInput.getAvgChap(), clusterInput.getAvgBeach(), clusterInput.getAvgWalk(), clusterInput.getClusterId())); } } // divide all user to 7 cluster // 10000 stand for precision,the bigger of this value the more accuratly ClusterAssistant clusterAssistant = new ClusterAssistant(7, 10000, dataPoints); clusterAssistant.startAnalysis(); Vector[] v = clusterAssistant.getClusterOutput(); for (int i = 0; i < v.length; i++) { Vector tempV = v[i]; Iterator iter = tempV.iterator(); while (iter.hasNext()) { DataPoint dpTemp = (DataPoint) iter.next(); access.updateClusterCateOfUsers(dpTemp.getUserid(), new Integer(i + 1)); } } } }
ClusterAssistant.java
package eu.eodigos.kmean; import java.util.Vector; /** * @author daoger * @version 1.0 * @k-mean Cluster */ public class ClusterAssistant { private Cluster[] clusters; private int miter; private Vector mDataPoints = new Vector(); private double mSWCSS; public ClusterAssistant(int k, int iter, Vector dataPoints) { clusters = new Cluster[k]; for (int i = 0; i < k; i++) { clusters[i] = new Cluster("Cluster" + i); } this.miter = iter; this.mDataPoints = dataPoints; } private void calcSWCSS() { double temp = 0; for (int i = 0; i < clusters.length; i++) { temp = temp + clusters[i].getSumSqr(); } mSWCSS = temp; } public void startAnalysis() { setInitialCentroids(); int n = 0; loop1: while (true) { for (int l = 0; l < clusters.length; l++) { clusters[l].addDataPoint((DataPoint) mDataPoints.elementAt(n)); n++; if (n >= mDataPoints.size()) break loop1; } } calcSWCSS(); for (int i = 0; i < clusters.length; i++) { clusters[i].getCentroid().calcCentroid(); } calcSWCSS(); for (int i = 0; i < miter; i++) { for (int j = 0; j < clusters.length; j++) { for (int k = 0; k < clusters[j].getNumDataPoints(); k++) { double tempEuDt = clusters[j].getDataPoint(k).getCurrentEuDt(); Cluster tempCluster = null; boolean matchFoundFlag = false; for (int l = 0; l < clusters.length; l++) { if (tempEuDt > clusters[j].getDataPoint(k).testEuclideanDistance(clusters[l].getCentroid())) { tempEuDt = clusters[j].getDataPoint(k).testEuclideanDistance(clusters[l].getCentroid()); tempCluster = clusters[l]; matchFoundFlag = true; } } if (matchFoundFlag) { tempCluster.addDataPoint(clusters[j].getDataPoint(k)); clusters[j].removeDataPoint(clusters[j].getDataPoint(k)); for (int m = 0; m < clusters.length; m++) { clusters[m].getCentroid().calcCentroid(); } calcSWCSS(); } } } } } public Vector[] getClusterOutput() { Vector v[] = new Vector[clusters.length]; for (int i = 0; i < clusters.length; i++) { v[i] = clusters[i].getDataPoints(); } return v; } private void setInitialCentroids() { // kn = (round((max-min)/k)*n)+min where n is from 0 to (k-1). double[] c = new double[7]; for (int n = 1; n <= clusters.length; n++) { for (int i = 1; i < 8; i++) { c[i - 1] = (((getMaxXValue(i) - getMinXValue(i)) / (clusters.length + 1)) * n) + getMinXValue(i); } Centroid ce = new Centroid(c[0], c[1], c[2], c[3], c[4], c[5], c[6]); clusters[n - 1].setCentroid(ce); ce.setCluster(clusters[n - 1]); } } private double getMaxXValue(int avgnumber) { double temp = 0.0; switch (avgnumber) { case 1:// Archeological temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg1(); break; case 2:// Monuments temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg2(); break; case 3:// Museums temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg3(); break; case 4:// Buildings temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg4(); break; case 5:// Chapels temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg5(); break; case 6:// Beaches temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg6(); break; case 7:// Walking temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg7(); break; } for (int i = 0; i < mDataPoints.size(); i++) { DataPoint dp = (DataPoint) mDataPoints.elementAt(i); switch (avgnumber) { case 1:// Archeological temp = (dp.getAvg1() > temp) ? dp.getAvg1() : temp; break; case 2:// Monuments temp = (dp.getAvg2() > temp) ? dp.getAvg2() : temp; break; case 3:// Museums temp = (dp.getAvg3() > temp) ? dp.getAvg3() : temp; break; case 4:// Buildings temp = (dp.getAvg4() > temp) ? dp.getAvg4() : temp; break; case 5:// Chapels temp = (dp.getAvg5() > temp) ? dp.getAvg5() : temp; break; case 6:// Beaches temp = (dp.getAvg6() > temp) ? dp.getAvg6() : temp; break; case 7:// Walking temp = (dp.getAvg7() > temp) ? dp.getAvg7() : temp; break; } } return temp; } private double getMinXValue(int avgnumber) { double temp = 0.0; switch (avgnumber) { case 1:// Archeological temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg1(); break; case 2:// Monuments temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg2(); break; case 3:// Museums temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg3(); break; case 4:// Buildings temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg4(); break; case 5:// Chapels temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg5(); break; case 6:// Beaches temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg6(); break; case 7:// Walking temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg7(); break; } for (int i = 0; i < mDataPoints.size(); i++) { DataPoint dp = (DataPoint) mDataPoints.elementAt(i); switch (avgnumber) { case 1:// Archeological temp = (dp.getAvg1() < temp) ? dp.getAvg1() : temp; break; case 2:// Monuments temp = (dp.getAvg2() < temp) ? dp.getAvg2() : temp; break; case 3:// Museums temp = (dp.getAvg3() < temp) ? dp.getAvg3() : temp; break; case 4:// Buildings temp = (dp.getAvg4() < temp) ? dp.getAvg4() : temp; break; case 5:// Chapels temp = (dp.getAvg5() < temp) ? dp.getAvg5() : temp; break; case 6:// Beaches temp = (dp.getAvg6() < temp) ? dp.getAvg6() : temp; break; case 7:// Walking temp = (dp.getAvg7() < temp) ? dp.getAvg7() : temp; break; } } return temp; } public int getKValue() { return clusters.length; } public int getIterations() { return miter; } public int getTotalDataPoints() { return mDataPoints.size(); } public double getSWCSS() { return mSWCSS; } public Cluster getCluster(int pos) { return clusters[pos]; } }
Centroid.java
package eu.eodigos.kmean; /** * @author daoger * @version 1.0 * @k-mean Cluster */ class Centroid { private double avgC1, avgC2, avgC3, avgC4, avgC5, avgC6, avgC7; private Cluster mCluster; public Centroid(double ac1, double ac2, double ac3, double ac4, double ac5, double ac6, double ac7) { this.avgC1 = ac1; this.avgC2 = ac2; this.avgC3 = ac3; this.avgC4 = ac4; this.avgC5 = ac5; this.avgC6 = ac6; this.avgC7 = ac7; } public void calcCentroid() { // only called by CAInstance int numDP = mCluster.getNumDataPoints(); double temp1 = 0, temp2 = 0, temp3 = 0, temp4 = 0, temp5 = 0, temp6 = 0, temp7 = 0; int i; // caluclating the new Centroid for (i = 0; i < numDP; i++) { temp1 = temp1 + mCluster.getDataPoint(i).getAvg1(); // total for avg1 temp2 = temp2 + mCluster.getDataPoint(i).getAvg2(); // total for avg1 temp3 = temp3 + mCluster.getDataPoint(i).getAvg3(); // total for avg1 temp4 = temp4 + mCluster.getDataPoint(i).getAvg4(); // total for avg1 temp5 = temp5 + mCluster.getDataPoint(i).getAvg5(); // total for avg1 temp6 = temp6 + mCluster.getDataPoint(i).getAvg6(); // total for avg1 temp7 = temp7 + mCluster.getDataPoint(i).getAvg7(); // total for avg1 } this.avgC1 = temp1 / numDP; this.avgC2 = temp2 / numDP; this.avgC3 = temp3 / numDP; this.avgC4 = temp4 / numDP; this.avgC5 = temp5 / numDP; this.avgC6 = temp6 / numDP; this.avgC7 = temp7 / numDP; // calculating the new Euclidean Distance for each Data Point temp1 = 0; temp2 = 0; temp3 = 0; temp4 = 0; temp5 = 0; temp6 = 0; temp7 = 0; for (i = 0; i < numDP; i++) { mCluster.getDataPoint(i).calcEuclideanDistance(); } // calculate the new Sum of Squares for the Cluster mCluster.calcSumOfSquares(); } public void setCluster(Cluster c) { this.mCluster = c; } public double getAvgC1() { return avgC1; } public void setAvgC1(double avgC1) { this.avgC1 = avgC1; } public double getAvgC2() { return avgC2; } public void setAvgC2(double avgC2) { this.avgC2 = avgC2; } public double getAvgC3() { return avgC3; } public void setAvgC3(double avgC3) { this.avgC3 = avgC3; } public double getAvgC4() { return avgC4; } public void setAvgC4(double avgC4) { this.avgC4 = avgC4; } public double getAvgC5() { return avgC5; } public void setAvgC5(double avgC5) { this.avgC5 = avgC5; } public double getAvgC6() { return avgC6; } public void setAvgC6(double avgC6) { this.avgC6 = avgC6; } public double getAvgC7() { return avgC7; } public void setAvgC7(double avgC7) { this.avgC7 = avgC7; } public Cluster getCluster() { return mCluster; } }
Cluster.java
package eu.eodigos.kmean; import java.util.Vector; /** * @author daoger * @version 1.0 * @k-mean Cluster */ class Cluster { private String mName; private Centroid mCentroid; private double mSumSqr; private Vector<DataPoint> mDataPoints; public Cluster(String name) { this.mName = name; this.mCentroid = null; // will be set by calling setCentroid() mDataPoints = new Vector<DataPoint>(); } public void setCentroid(Centroid c) { mCentroid = c; } public Centroid getCentroid() { return mCentroid; } public void addDataPoint(DataPoint dp) { // called from CAInstance dp.setCluster(this); // initiates a inner call to calcEuclideanDistance() in DP. this.mDataPoints.addElement(dp); calcSumOfSquares(); } public void removeDataPoint(DataPoint dp) { this.mDataPoints.removeElement(dp); calcSumOfSquares(); } public int getNumDataPoints() { return this.mDataPoints.size(); } public DataPoint getDataPoint(int pos) { return (DataPoint) this.mDataPoints.elementAt(pos); } public void calcSumOfSquares() { // called from Centroid int size = this.mDataPoints.size(); double temp = 0; for (int i = 0; i < size; i++) { temp = temp + ((DataPoint) this.mDataPoints.elementAt(i)).getCurrentEuDt(); } this.mSumSqr = temp; } public double getSumSqr() { return this.mSumSqr; } public String getName() { return this.mName; } public Vector getDataPoints() { return this.mDataPoints; } }
DataPoint.java
package eu.eodigos.kmean; /** * @author daoger * @version 1.0 * @k-mean Cluster */ public class DataPoint { private double avg1, avg2, avg3, avg4, avg5, avg6, avg7; private Integer userid; private Cluster mCluster; private double mEuDt; public DataPoint(double avg1, double avg2, double avg3, double avg4, double avg5, double avg6, double avg7, Integer userid) { this.avg1 = avg1; this.avg2 = avg2; this.avg3 = avg3; this.avg4 = avg4; this.avg5 = avg5; this.avg6 = avg6; this.avg7 = avg7; this.userid = userid; this.mCluster = null; } public void setCluster(Cluster cluster) { this.mCluster = cluster; calcEuclideanDistance(); } public void calcEuclideanDistance() { // called when DP is added to a cluster or when a Centroid is // recalculated. mEuDt = Math.sqrt(Math.pow((avg1 - mCluster.getCentroid().getAvgC1()), 2) + Math.pow((avg2 - mCluster.getCentroid().getAvgC2()), 2) + Math.pow((avg3 - mCluster.getCentroid().getAvgC3()), 2) + Math.pow((avg4 - mCluster.getCentroid().getAvgC4()), 2) + Math.pow((avg5 - mCluster.getCentroid().getAvgC5()), 2) + Math.pow((avg6 - mCluster.getCentroid().getAvgC6()), 2) + Math.pow((avg7 - mCluster.getCentroid().getAvgC7()), 2)); } public double testEuclideanDistance(Centroid c) { return Math.sqrt(Math.pow((avg1 - c.getAvgC1()), 2) + Math.pow((avg2 - c.getAvgC2()), 2) + Math.pow((avg3 - c.getAvgC3()), 2) + Math.pow((avg4 - c.getAvgC4()), 2) + Math.pow((avg5 - c.getAvgC5()), 2) + Math.pow((avg6 - c.getAvgC6()), 2) + Math.pow((avg7 - c.getAvgC7()), 2)); } public double getAvg1() { return avg1; } public void setAvg1(double avg1) { this.avg1 = avg1; } public double getAvg2() { return avg2; } public void setAvg2(double avg2) { this.avg2 = avg2; } public double getAvg3() { return avg3; } public void setAvg3(double avg3) { this.avg3 = avg3; } public double getAvg4() { return avg4; } public void setAvg4(double avg4) { this.avg4 = avg4; } public double getAvg5() { return avg5; } public void setAvg5(double avg5) { this.avg5 = avg5; } public double getAvg6() { return avg6; } public void setAvg6(double avg6) { this.avg6 = avg6; } public double getAvg7() { return avg7; } public void setAvg7(double avg7) { this.avg7 = avg7; } public Cluster getCluster() { return mCluster; } public double getCurrentEuDt() { return mEuDt; } /** * @return the userid */ public Integer getUserid() { return userid; } /** * @param userid * the userid to set */ public void setUserid(Integer userid) { this.userid = userid; } }