java实现huffman编码

    最近闲来无事,在小破站看到了Huffman编码原理,就想着用Java自己实现一波,Huffman编码的原理就是通过统计字符出现的次数作为权值来构建Huffman树,然后以左子树为0,右子树为1进行编码,因为字符对应的节点都是叶子节点,这样每一个字符的编码都是唯一的,以此实现对字符串的压缩编解码.话不多说,直接上代码:
    首先定义树节点对象
public static class HTNode {

        /**
         * 左子节点
         */
        private HTNode left;

        /**
         * 右子节点
         */
        private HTNode right;

        /**
         * 权重
         */
        private int power;

        /**
         * 对应的字符
         */
        private E v;

        public HTNode(int power){
            this.setPower(power);
        }

        public HTNode(){

        }

        public HTNode(int power, E v){
            this.power = power;
            this.v = v;
        }

        public HTNode getLeft() {
            return left;
        }

        public void setLeft(HTNode left) {
            this.left = left;
        }

        public HTNode getRight() {
            return right;
        }

        public void setRight(HTNode right) {
            this.right = right;
        }

        public int getPower() {
            return power;
        }

        public void setPower(int power) {
            this.power = power;
        }

        public E getV() {
            return v;
        }

        public void setV(E v) {
            this.v = v;
        }

        /**
         * 前序遍历函数
         * @param node
         */
        public void printfVLR(HTNode node){
            if(node == null){
                return;
            }
            System.out.println(node.power+"     "+node.v);
            //遍历左子树
            printfVLR(node.left);
            //遍历右子树
            printfVLR(node.right);
        }
    }

接下来写个排序函数,就用快排吧

public static  void quickSortT(T[] arr, int start, int end){
        if(start >= end)
            return;
        int midIdx = subQuickSort(arr, start, end);
        quickSortT(arr, 0, midIdx -1);
        quickSortT(arr, midIdx+1, end);
    }

    public static  int subQuickSort(T[] arr, int start, int end){
        T mid = arr[start];
        while (start < end){
            if(arr[end].getPower() >= mid.getPower() && start < end){
                end--;
            }
            arr[start] = arr[end];
            while(arr[start].getPower() <= mid.getPower() && start < end){
                start++;
            }
            arr[end] = arr[start];
        }
        arr[start] = mid;
        return start;
    }

然后是构建Huffman树的函数:

public class TestHuffmanTree {

    private static HTNode root;
    

    public static  HTNode getHuffmanTree(Map map){
        return huffmanTree(transfer(map));
    }
    
    public static  HTNode[] transfer(Map map){
        HTNode[] htNodes = new HTNode[map.size()];
        int i=0;
        Iterator> iterator = map.entrySet().iterator();
        while(iterator.hasNext()){
            Map.Entry next = iterator.next();
            htNodes[i] = new HTNode(next.getValue(), next.getKey());
            i++;
        }
        return htNodes;
    }

    private static HTNode huffmanTree(HTNode[] arr){
        long start = System.nanoTime();
        if(null == arr || arr.length == 0)
            return null;
        while(arr.length > 1){
            for(int i=0;i< arr.length;++i){
                if(arr.length >= 2){
                    QuickSort.quickSortT(arr, 0, arr.length-1);
                }
                //取出两个节点,结合成新节点,
                HTNode n1 = arr[0];
                HTNode n2 = arr[1];
                HTNode newNode = new HTNode(n1.power+ n2.power);
                newNode.setLeft(n1);
                newNode.setRight(n2);
                arr = Arrays.copyOfRange(arr, 1, arr.length);
                arr[0] = newNode;
            }
        }
        root = arr[0];
        System.out.println("耗时:"+(System.nanoTime() - start));
        return root;
    }
}

然后是对huffman树进行编码

public class TestHuffmanCode {

    public static void main(String[] args) {
        String s = "i have a dream, this dream depends on american dream";
        Map charMap = getCharMap(s);
        TestHuffmanTree.HTNode huffmanTree = TestHuffmanTree.getHuffmanTree(charMap);
        huffmanTree.printfVLR(huffmanTree);
        getHuffmanCode(huffmanTree ,null);
        huffmanCodeMap.forEach((k, v) ->{
            System.out.println(k +"=="+v);
        });

        String result = "";
        for(int i=0;i< chars.length;++i){
            String hCode = huffmanCodeMap.get(chars[i]);
            result+=hCode;
        }
        System.out.println("最终编码为:"+result);
    }




    private static Map getCharMap(String s){
        Map map = new HashMap<>();
        chars = s.toCharArray();
        for(int i=0;i< chars.length;++i){
            char aChar = chars[i];
            if(map.containsKey(aChar)){
                Integer count = map.get(aChar);
                map.put(aChar, ++count);
            }else{
                map.put(aChar, 1);
            }
        }
        return map;
    }
    static Map huffmanCodeMap = new HashMap<>();

    static StringBuilder sb = new StringBuilder();

    static char[] chars;

    /**
     * 前序遍历获取各子节点Huffman编码,并缓存到 huffmanCodeMap 中
     * @param node
     * @param direction
     */
    private static void getHuffmanCode(TestHuffmanTree.HTNode node, Integer direction){

        if(node == null)
            return;
        if(direction != null){
            sb.append(direction);
        }
        if(node.getV() != null){
            huffmanCodeMap.put(node.getV(), sb.toString());
            sb = sb.deleteCharAt(sb.length()-1);
            return;
        }
        TestHuffmanTree.HTNode left = node.getLeft();
        getHuffmanCode(left, 0);
        TestHuffmanTree.HTNode right = node.getRight();
        getHuffmanCode(right, 1);
        //如果跳出本次递归调用,返回上一层递归,如果sb不为空,删除掉最后一个编码
        if(sb.length() > 0){
            sb = sb.deleteCharAt(sb.length()-1);
        }
    }
}

接下来是打印结果

耗时:698300
52     null
21     null
9      
12     null
5     d
7     a
31     null
14     null
7     null
3     i
4     m
7     e
17     null
8     null
4     null
2     s
2     null
1     p
1     o
4     r
9     null
4     null
2     h
2     null
1     v
1     c
5     null
2     null
1     ,
1     t
3     n
 ==00
a==011
c==111011
d==010
e==101
h==11100
i==1000
,==111100
m==1001
n==11111
o==110011
p==110010
r==1101
s==11000
t==111101
v==111010
最终编码为:1000001110001111101010100011000101101101011100111110000111101111001000110000001011011010111001000101011100101011111101011000001100111111100011100110111011000111011011111110001011011010111001

完毕!

你可能感兴趣的:(java实现huffman编码)