24 哈夫曼编码实现字符串的压缩与解压

哈夫曼编码

文章目录

  • 哈夫曼编码
    • 1. 步骤
    • 2. 数据压缩
    • 3. 数据解压

1. 步骤

  1. 传输的字符串:i like like like java do you like a java
  2. 计算各个字符对应的个数。
  3. 按照字符出现的次数构建一颗赫夫曼树, 次数作为权值。
24 哈夫曼编码实现字符串的压缩与解压_第1张图片
  1. 根据赫夫曼树,给各个字符,规定编码 (前缀编码), 向左的路径为 0 向右的路径为 1。则有,o: 1000 u: 10010 d: 100110 y: 100111 i: 101 a : 110 k: 1110 e: 1111 j: 0000 v: 0001 l: 001 : 01
  2. 按照上面的赫夫曼编码,i like like like java do you like a java 字符串对应的编码为 1010100110111101111010011011110111101001101111011110100001100001110011001111000011001111000100100100110111101111011100100001100001110

2. 数据压缩

  1. 定义哈夫曼树节点类

    // 定义节点类
    class HuffmanCodeNode implements Comparable<HuffmanCodeNode>{
        private Byte character;
        private int value;
        private HuffmanCodeNode leftNode;
        private HuffmanCodeNode rightNode;
    
        // 前序遍历
        public void preOrder() {
            System.out.println(this);
            if (this.leftNode != null) {
                this.leftNode.preOrder();
            }
            if (this.rightNode != null) {
                this.rightNode.preOrder();
            }
        }
    
        public HuffmanCodeNode(Byte character, int value) {
            this.character = character;
            this.value = value;
        }
    
        public Byte getCharacter() {
            return character;
        }
    
        public void setCharacter(byte character) {
            this.character = character;
        }
    
        public int getValue() {
            return value;
        }
    
        public void setValue(int value) {
            this.value = value;
        }
    
        public HuffmanCodeNode getLeftNode() {
            return leftNode;
        }
    
        public void setLeftNode(HuffmanCodeNode leftNode) {
            this.leftNode = leftNode;
        }
    
        public HuffmanCodeNode getRightNode() {
            return rightNode;
        }
    
        public void setRightNode(HuffmanCodeNode rightNode) {
            this.rightNode = rightNode;
        }
    
        @Override
        public String toString() {
            return "HuffmanCodeNode{" +
                    "character=" + character +
                    ", value=" + value +
                    '}';
        }
    
        @Override
        public int compareTo(HuffmanCodeNode o) {
            // 升序排序
            return this.value - o.value;
        }
    }
    
  2. 得到每个节点的数据与权值

    public static List<HuffmanCodeNode> getNodes(String str) {
        // 存放所有的节点
        ArrayList<HuffmanCodeNode> nodes = new ArrayList<>();
        // 临时存放节点的符号与权值
        HashMap<Byte, Integer> nodeMap = new HashMap<>();
        byte[] strBytes = str.getBytes();
        // 计算字符串中每个字符的权值
        for (byte strByte : strBytes) {
            if (nodeMap.get(strByte) == null) {
              	nodeMap.put(strByte, 1);
            }
            else {
              	nodeMap.put(strByte, nodeMap.get(strByte) + 1);
            }
        }
        Set<Map.Entry<Byte, Integer>> entrySet = nodeMap.entrySet();
        // 将所有节点加入到列表中
        for (Map.Entry<Byte, Integer> byteIntegerEntry : entrySet) {
          	nodes.add(new HuffmanCodeNode(byteIntegerEntry.getKey(), byteIntegerEntry.getValue()));
        }
        // 返回所有节点的符号与权值
        return nodes;
    }
    
  3. 根据节点构造哈夫曼树

    // 构造哈夫曼树
    public static HuffmanCodeNode createHuffmanCodeTree(List<HuffmanCodeNode> huffmanCodeNodes) {
        while (huffmanCodeNodes.size() > 1) {
            // 根据权值对节点进行升序排序
            Collections.sort(huffmanCodeNodes);
            // 获取最小权值的节点
            HuffmanCodeNode leftNode = huffmanCodeNodes.remove(0);
            // 获取第二小权值的节点
            HuffmanCodeNode rightNode = huffmanCodeNodes.remove(0);
            // 构造父节点
            HuffmanCodeNode parent = new HuffmanCodeNode(null, leftNode.getValue() + rightNode.getValue());
            parent.setLeftNode(leftNode);
            parent.setRightNode(rightNode);
            // 将父节点添加进列表中,继续参与运算
            huffmanCodeNodes.add(parent);
        }
        // 返回树的根节点
        return huffmanCodeNodes.get(0);
    }
    
  4. 得到每个节点(字符)的哈夫曼编码

    static StringBuilder stringBuilder = new StringBuilder();
    // 保存符号与该符号对应的哈夫曼编码
    static HashMap<Byte, String> hashMap = new HashMap<>();
    
    // 重载 getHuffmanCodes
    public static Map<Byte, String> getHuffmanCodes(HuffmanCodeNode root) {
        if (root == null) {
          	return null;
        }
        // 向左递归
        getHuffmanCodes(root.getLeftNode(), "0", stringBuilder);
        // 向右递归
        getHuffmanCodes(root.getRightNode(), "1", stringBuilder);
        return hashMap;
    }
    
    // 得到哈夫曼编码
    private static void getHuffmanCodes(HuffmanCodeNode node, String code, StringBuilder sb) {
        StringBuilder stringBuilder = new StringBuilder(sb);
        // 追加当前节点的编码
        stringBuilder.append(code);
        if (node != null) {
            if (node.getCharacter() == null) {  // 如果是非叶子节点
                // 向左递归
                getHuffmanCodes(node.getLeftNode(), "0", stringBuilder);
                // 向右递归
                getHuffmanCodes(node.getRightNode(), "1", stringBuilder);
            } else {  // 如果是叶子节点
              	hashMap.put(node.getCharacter(), stringBuilder.toString());
            }
        }
    }
    
  5. 压缩字符串

    /**
    * 使用哈夫曼编码压缩字符串
    * @param strBytes  要压缩的字符串的字节数组
    * @param huffmanCodes  节点的哈夫曼编码
    * @return  压缩后的字节数组
    */
    public static byte[] zip(byte[] strBytes, Map<Byte, String> huffmanCodes) {
        StringBuilder sb = new StringBuilder();
        byte[] huffmanCodesBytes = null;
        for (byte charByte : strBytes) {
          	sb.append(huffmanCodes.get(charByte));
        }
        if (sb.length() % 8 == 0) {
          	huffmanCodesBytes = new byte[sb.length() / 8];
        } else {
          	huffmanCodesBytes = new byte[sb.length() / 8 + 1];
        }
        int index = 0;
        for (int i = 0; i < sb.length(); i += 8) {
            if (i + 8 > sb.length()) {
              	huffmanCodesBytes[index] = (byte) Integer.parseInt(sb.substring(i), 2);
            } else {
              	huffmanCodesBytes[index] = (byte) Integer.parseInt(sb.substring(i, i + 8), 2);
            }
            index++;
        }
        return huffmanCodesBytes;
    }
    
  6. 整合代码,方便使用

    // 重载 zip() 方法
    public static byte[] zip(String str) {
        byte[] strBytes = str.getBytes();
        List<HuffmanCodeNode> nodes = getNodes(str);
        HuffmanCodeNode root = createHuffmanCodeTree(nodes);
        Map<Byte, String> huffmanCodes = getHuffmanCodes(root);
        byte[] huffmanCodesBytes = zip(str.getBytes(), huffmanCodes);
        return huffmanCodesBytes;
    }
    
  7. 测试

    String str = "i like like like java do you like a java";
    byte[] zip = HuffmanCodes.zip(str);
    System.out.println(Arrays.toString(zip));
    

    输出:

    [-88, -65, -56, -65, -56, -65, -55, 77, -57, 6, -24, -14, -117, -4, -60, -90, 28]
    

3. 数据解压

  1. 将压缩后的字节转为二进制字符串

    // 字节转二进制字符串
    public static String byteToBitString(boolean flag, byte b) {
        int temp = b;
        if (flag) {
          	temp |= 256;
        }
        String str = Integer.toBinaryString(temp);
        if (flag) {
          	return str.substring(str.length() - 8);
        } else {
          	return str;
        }
    }
    
  2. 将字符串的哈夫曼编码转回为字符序列

    // 将哈夫曼编码转回字符序列
    public static byte[] decode(byte[] huffmanCodesBytes, Map<Byte, String> huffmanCodes) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < huffmanCodesBytes.length; i++) {
            byte temp = huffmanCodesBytes[i];
            boolean flag = (i == huffmanCodesBytes.length - 1);
            sb.append(byteToBitString(!flag, temp));
        }
    
        HashMap<String, Byte> map = new HashMap<>();
        for (Map.Entry<Byte, String> byteStringEntry : huffmanCodes.entrySet()) {
          	map.put(byteStringEntry.getValue(), byteStringEntry.getKey());
        }
    
        List<Byte> list = new ArrayList<>();
        for (int i = 0; i < sb.length();) {
            int count = 1;
            boolean flag = true;
            Byte b = null;
            while (flag) {
                String key = sb.substring(i, i + count);
                b = map.get(key);
                if (b == null) {
                  	count++;
                } else {
                  	flag = false;
                }
            }
            list.add(b);
            i += count;
        }
        byte[] bytes = new byte[list.size()];
        for (int i = 0; i < list.size(); i++) {
          	bytes[i] = list.get(i);
        }
        return bytes;
    }
    

你可能感兴趣的:(数据结构与算法)