java 实现数据挖掘的独热编码 OneHot

//    独热编码,对某一列进行编码
    public static ArrayList oneHot(ArrayList list, int index) throws Exception {

//      建立键值
        HashSet set = new HashSet<>();
        for (String l : list) {
            set.add(l.split(",")[index]);
        }
        pln("键值总数:");
        System.out.println(set.size());

//        为键值映射数组下表
        HashMap toIndex = new HashMap<>();
        int ind = 0;
        for (String a : set) {
            toIndex.put(a, ind);
            ind++;
        }
//      开始编码
        for (int i=0; i<list.size(); i++) {
            int a[] = new int[set.size()];
            a[ toIndex.get( list.get(i).split(",")[index] ) ] = 1;
            list.set(i, list.get(i) + ","+array2string(a));
        }

        return list;
    }

你可能感兴趣的:(java)