Kmeans

/**

 * 
 * 
 * 
 * 
 * */
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;

public class kMeans {

    private static int k;
    private String dataFilePath;
    private int featureCount;
    private static Double SSE = Double.MAX_VALUE;
    private double SSEthreadhold ;
    List srcData = new ArrayList();
    List correctClass =  new ArrayList();
    static Double[][] kCores ;
    Map> Cdata = new HashMap>();

    public kMeans(int k ,int featureCount ,String dataFilePath) throws IOException{
        this.k = k;
        this.featureCount = featureCount;
        this.dataFilePath = dataFilePath;
        SSEthreadhold = Double.MAX_VALUE;
        kCores = new Double[k][featureCount+1];
        initSrcData();
        initKcoresByRandomFunction();
        Cluster();
    }

    public kMeans(int k , int featureCount ,String dataFilePath,double SEthreadhold) throws IOException{
        this(k ,featureCount,dataFilePath);
        this.SSEthreadhold = SEthreadhold;
    }

    void initSrcData(){
        int count = 0;
        try {
            BufferedReader br = new BufferedReader(new FileReader(dataFilePath));
            String s;
            while((s = br.readLine())!=null){

                Double[] srcDataTep = new Double[featureCount+1];
                srcDataTep[0] = (double)(++count);
                String tep[] = s.split(",");
                for(int i=1;i0]);   
                    }
            br.close();
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            System.out.println("srcData FilePath is not accessable!");
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }       
    }

    void initKcoresByRandomFunction(){

        Set seeds = new HashSet();
        Random rand = new Random();
        int i = 0;
        while(iint index = rand.nextInt(srcData.size()-1);
            while(seeds.contains(index)){
                index = rand.nextInt(srcData.size()-1);
            }

            for(int j=1;j1;j++){
                kCores[i][j] = srcData.get(index)[j];
            }
            i++;            
        }
    }

    boolean clusterOnce() throws IOException{

        Cdata.clear();
        System.out.println(srcData.size());
        for(Double[] s:srcData){
            int index = findNearest(s);
        //  System.out.println(index);
            List tep;
            if(Cdata.containsKey(index)){
                tep = Cdata.get(index);
            }
            else{
                tep = new ArrayList();
            }
            tep.add(s);
            Cdata.put(index, tep);
        }
        newCores();

        if(newSSE() == SSE)
            return false;
        else{
            SSE = newSSE();
            return true;
        } 

    }

    void Cluster() throws IOException{
        boolean flag = clusterOnce(); 
        while(flag && SSE < SSEthreadhold){
            flag = clusterOnce();
            System.out.println(SSE);
        }
        writeResult2File();
    }

    int findNearest(Double[] s){
        double DistanceTep = Double.MAX_VALUE;
        int index = 0;
        for(int i=0;iif(Distance(s,kCores[i])return index;
    }

    double[] split2Array(String s){

        double[] data = new double[s.split(",").length-2];
        String tep[]  = s.split(",");

        for(int i=1;i2;i++){
            data[i-1] = Integer.parseInt(tep[i]);
        }
        return data;
    }
    double Distance(Double[]a ,Double[]b){
        double distance = 0.0;
        if(a.length!= b.length){
            System.out.println("Error Error in the Distance:  data length don`t match");
            return 0.0;
        }
        else{
            for(int i=1;ireturn distance;            
        }
    }

    double newSSE(){                      

        double newSse = 0.0 ;
        for(int i=0;i iCluster = Cdata.get(i);
            Double[] iCore = kCores[i];
            if(iCluster!=null){
                for(Double[]s : iCluster){
                    newSse = newSse+ Distance(s,iCore)*Distance(s,iCore);
                }
            }

        }
        return newSse;
    }

    void newCores(){                            
        Set KeySet = Cdata.keySet();
        for(Integer i:KeySet){              
            int count = 0;
            List tep = Cdata.get(i);
            Double coreI[] = new Double[featureCount+1];
            for(int t=0;t1;t++)
                coreI[t] = 0.0;
            for(Double[] dou : tep){    
                for(int j =1;jfor(int t=0;tvoid writeResult2File() throws IOException{

        Set key = Cdata.keySet();
        for(Integer ii:key){
            String filename = "result//"+ii.toString()+".txt";
            FileWriter fw = new FileWriter(filename);
            for(Double[] dou:Cdata.get(ii)){
                String s = correctClass.get(dou[0].intValue()-1)+" ";
                for(int j=1;j" ";
                fw.write(s+"\n");
            }
        }
    }
    public static void main(String args[]) throws IOException{
        kMeans kk= new kMeans(3, 2, "total.txt");
        //Double[][] kCores ;

//      for(int i=0;i
//          for(int j=0;j
//              System.out.print(kCores[i][j]+" ");
//          }
//          System.out.println();
//      }
    }
}

你可能感兴趣的:(机器学习)