JAVA代码编写哈夫曼编码实现数据和文件的压缩和解压算法

压缩算法思路:
1.将待压缩的字符串变成字节数组 byte[] contentBytes;

2.将字节数组每个字符出现的次数统计出来变为Node类(value为字符对应的Ascci码,weight为字符出现的次数也是哈夫曼树的权值),存入List集合中方便下面构建哈夫曼树;

 List nodes = new ArrayList<>();
        //遍历bytes,记录每个value出现的次数,出现的次数就是huffman树的权值
        Map counts = new HashMap<>();
        for (byte b : bytes) {
            Integer count = counts.get(b);
            if (count == null) {
                counts.put(b, 1);
            } else {
                counts.put(b, count + 1);
            }
        }
        for (Map.Entry map : counts.entrySet()) {
            nodes.add(new Node(map.getKey(), map.getValue()));
        }
        return nodes;

3.构建哈弗曼树, private static Map huffmanCode = new HashMap(); 用huffmanCode存放对应字符的Ascci码,和对应路径 (构建哈夫曼树的过程请看上篇文章)

private static Node getHuffman(List nodes) {
        while (nodes.size() > 1) {
            Collections.sort(nodes);
            Node leftNode = nodes.get(0);
            Node rightNode = nodes.get(1);
            Node parent = new Node(null, leftNode.weight + rightNode.weight);
            parent.left = leftNode;
            parent.right = rightNode;
            nodes.remove(leftNode);
            nodes.remove(rightNode);
            nodes.add(parent);
        }
        return nodes.get(0);
    }

4.遍历哈弗曼树得到字符的二进制路径,从根节点出发,往左为0,往右为1直到Node.value!=null,说明找到了对应字符

 //得到所有字符的路径
    private static void getAllCodes(Node root) {
        if (root == null) {
            return;
        }
        //记录路径,遍历huffman树时往左为0,往右为1直到找到对应字符;
        StringBuilder stringBuilder = new StringBuilder();
        getcode(root.left, "0", stringBuilder);
        getcode(root.right, "1", stringBuilder);

    }


 /*
     * @param node 当前需要遍历的节点
     * @param code 值为"0"或者"1"代表向左或者向右
     * @param stringBuilder
     * @return : void
     * @date : 2020/3/8 13:20
     */
    private static void getcode(Node node, String code, StringBuilder stringBuilder) {
        StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
        stringBuilder2.append(code);
        if (node != null) {
            if (node.value == null) {//证明这个点点为非叶子节点
                //向左递归
                getcode(node.left, "0", stringBuilder2);
                //向右递归
                getcode(node.right, "1", stringBuilder2);

            } else {
                huffmanCode.put(node.value, stringBuilder2.toString());
            }
        }
    }

5.将按照contentBytes的顺序逐个用二进制路径代替字符形成一个Stringbuiled字符串

private static byte[] getZipArr(byte[] bytes, Map huffmanCode) {
        //按照bytes中的字符排序顺序用路径替代字符存入stringBuilder
        StringBuilder stringBuilder = new StringBuilder();
        for (byte b : bytes) {
            stringBuilder.append(huffmanCode.get(b));
        }
        //将stringBuilder每8个二进制当做一个byte类型存入字节数组
        int len = 0;
        //用于补齐不足8位的地方
        String str = "";
        //确定字节数组长度
        if (stringBuilder.length() % 8 == 0) {
            len = stringBuilder.length() / 8;
        } else {
            len = stringBuilder.length() / 8 + 1;
            //不足8位的地方需要补0的个数
            zeloCount = 8 - (stringBuilder.length() % 8);
            for (int i = 0; i < zeloCount; i++) {
                str += "0";
              //  System.out.println("zelo:"+str);
            }
            stringBuilder.append(str);
        }
       // System.out.println("stringBuilder1:"+stringBuilder);
        //将stringbuild里面的二进制路径字符串每8位进行翻译成十进制
        return stringTransfertoByteArr(stringBuilder, len);
    }


6.将Stringbuiled按照每8位2进制路径转为一个十进制的数存在字节数组中;

 //将stringbuild里面的二进制路径字符串每8位进行翻译成十进制
    private static byte[] stringTransfertoByteArr(StringBuilder stringBuilder, int len) {
        byte[] huffmanCodeArr = new byte[len];
        //huffmanCodeArr的索引
        int index = 0;
        for (int i = 0; i < stringBuilder.length(); i += 8) {
            if (i + 8 > stringBuilder.length()) {//不够8位
                huffmanCodeArr[index] = (byte) Integer.parseInt(stringBuilder.substring(i), 2);
            } else {
                huffmanCodeArr[index] = (byte) Integer.parseInt(stringBuilder.substring(i, i + 8), 2);
            }
            index++;
        }
        return huffmanCodeArr;
    }

解压算法思路:

7.将存放十进制的字节数组转变为它对应的二进制路径,然后用Stringbuild汇总这些二进制就可以得到5中的Stringbuild;

 //将压缩后的字节数组变为用二进制路径表示字符的字符串
    private static StringBuilder byteArrtransfertoString(byte[] bytes) {
        StringBuilder stringBuilder = new StringBuilder();
        //将byte数组转化为二进制字符串
        for (int i = 0; i < bytes.length; i++) {
            byte b = bytes[i];
            boolean flag = true;
            String str = byteTransfertoBinaryString(b);
            if (i == bytes.length - 1) {
             //   System.out.println("str1:"+str);
                str = str.substring(0, 8-zeloCount);
             //   System.out.println("str2:"+str);
            }
            stringBuilder.append(str);
        }
        //System.out.println("stringBuilder2:"+stringBuilder);
        return stringBuilder;
    }
    
//将byte转化为二进制存入String
    private static String byteTransfertoBinaryString(byte b) {
        int temp = b;
        temp |= 256;
        String str = Integer.toBinaryString(temp);
        int len = str.length();
        return str.substring(len - 8);

    }

8.然后将huffmanCode反转变为Map map= new HashMap(),然后遍历7中的Stringbuild如果map中存在Stringbuild中某一段的路径就将它对应的字符添加到一个list中,遍历完成后就将list赋予给一个新的字节数组,打印该数组就得到了原来的字符串数组;


//遍历7中的Stringbuild如果map中存在Stringbuild中某一段的路径就添加到一个list中
private static List getChar(Map map, StringBuilder stringBuilder) {
        List list = new ArrayList();
        for (int i = 0; i < stringBuilder.length(); ) {
            int count = 1;
            boolean flag = true;
            //取出路径对应的字符
            while (flag) {
                if (!map.containsKey(stringBuilder.substring(i, i + count))) {
                    count++;
                } else {
                    flag = false;
                }
            }
            list.add(map.get(stringBuilder.substring(i, i + count)));
            i += count;
        }
        return list;
    }

private static byte[] decode(byte[] bytes, Map huffmanCode) {
        //将压缩后的字节数组变为用二进制路径表示字符的字符串,然后将字符串中路径变成对应字符
        StringBuilder stringBuilder = byteArrtransfertoString(bytes);
        Map map = new HashMap();
        //将huffmanCode反转,'a'->101 编程101->'a',将路径转为对应的字符
        for (Map.Entry entry : huffmanCode.entrySet()) {
            map.put(entry.getValue(), entry.getKey());
        }
        //将路径翻译成对应的字符
        List list = getChar(map, stringBuilder);
        //将list变为数组
        byte[] contenArr = new byte[list.size()];
        for (int i = 0; i < list.size(); i++) {
            contenArr[i] = list.get(i);
        }
        return contenArr;
    }

完整代码如下:

package com.yg.tree.huffman;/*
@author  Mu_Mu
@date    2020/3/8  10:16
*/

import com.yg.tree.HuffmanTree;

import java.io.*;
import java.util.*;

public class HaffmanCode {
    //存放对应字符的Ascci码,和对应路径
    private static Map huffmanCode = new HashMap();
    //补全最后一位需要加0的个数
    private static Integer zeloCount = 0;

    public static void main(String[] args) {
        //文件压缩
        String src = "D:/1.png";
        String dst = "D:/src.zip";
        zipFile(src, dst);
        System.out.println("压缩完成!");

        //解压
        String zipFile = "D:/src.zip";
        String dstFile = "D:/2.png";
        unZipFile(zipFile, dstFile);
        System.out.println("解压成功!");

//        String content = "i like java do you like a java very much";
//        byte[] contentBytes = content.getBytes();
//        //数据压缩
//        byte[] zipArr = huffmanZip(contentBytes);
//        System.out.println(Arrays.toString(zipArr));
//
//        //数据解压
//        byte[] decodeArr = decode(zipArr, huffmanCode);
//         System.out.println(new String(decodeArr).toString());

    }

    //压缩文件
    private static void zipFile(String src, String dst) {
        InputStream is = null;
        OutputStream os = null;
        ObjectOutputStream oos = null;
        try {
            is = new FileInputStream(src);
            //创建一个和文件一样大的字节数组
            byte[] b = new byte[is.available()];
            //将文件以字节流的形式读到字节数组中
            is.read(b);
            byte[] huffmanCodeArr = huffmanZip(b);
            os = new FileOutputStream(dst);
            oos = new ObjectOutputStream(os);
            //将字节数组,和哈夫曼编码以对象的形式输出去
            oos.writeObject(huffmanCodeArr);
            oos.writeObject(huffmanCode);
            //将字节数组最后一位要补0的个数传过去
            oos.writeObject(zeloCount);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                is.close();
                os.close();
                oos.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }


    }

    //解压文件
    /*
     * @param zipFile 准备解压的文件
     * @param dstFile 将文件解压的路径
     * @return : void
     * @date : 2020/3/10 12:18
     */
    private static void unZipFile(String zipFile, String dstFile) {
        InputStream is = null;
        ObjectInputStream ois = null;
        OutputStream os = null;
        try {
            is = new FileInputStream(zipFile);
            ois = new ObjectInputStream(is);
            //readObject()是按照你存入时的顺序依次读取对象
            byte[] huffmanCodeArr = (byte[]) ois.readObject();
            huffmanCode = (Map) ois.readObject();
            zeloCount = (Integer) ois.readObject();
           // System.out.println("zeloCount:"+zeloCount);
            byte[] contentArr = decode(huffmanCodeArr, huffmanCode);
            os=new FileOutputStream(dstFile);
            os.write(contentArr);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                is.close();
                ois.close();
                os.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    //解码
    /*
     * @param bytes  压缩后的字节数组
     * @param huffmanCode 字符,和字符对应的路径组成的map
     * @return : byte[] 返回元数组
     * @date : 2020/3/9 14:26
     */
    private static byte[] decode(byte[] bytes, Map huffmanCode) {
        //将压缩后的字节数组变为用二进制路径表示字符的字符串,然后将字符串中路径变成对应字符
        StringBuilder stringBuilder = byteArrtransfertoString(bytes);
        Map map = new HashMap();
        //将huffmanCode反转,'a'->101 编程101->'a',将路径转为对应的字符
        for (Map.Entry entry : huffmanCode.entrySet()) {
            map.put(entry.getValue(), entry.getKey());
        }
        //将路径翻译成对应的字符
        List list = getChar(map, stringBuilder);
        //将list变为数组
        byte[] contenArr = new byte[list.size()];
        for (int i = 0; i < list.size(); i++) {
            contenArr[i] = list.get(i);
        }
        return contenArr;
    }

    //将压缩后的字节数组变为用二进制路径表示字符的字符串
    private static StringBuilder byteArrtransfertoString(byte[] bytes) {
        StringBuilder stringBuilder = new StringBuilder();
        //将byte数组转化为二进制字符串
        for (int i = 0; i < bytes.length; i++) {
            byte b = bytes[i];
            boolean flag = true;
            String str = byteTransfertoBinaryString(b);
            if (i == bytes.length - 1) {
             //   System.out.println("str1:"+str);
                str = str.substring(0, 8-zeloCount);
             //   System.out.println("str2:"+str);
            }
            stringBuilder.append(str);
        }
        //System.out.println("stringBuilder2:"+stringBuilder);
        return stringBuilder;
    }

    private static List getChar(Map map, StringBuilder stringBuilder) {
        List list = new ArrayList();
        for (int i = 0; i < stringBuilder.length(); ) {
            int count = 1;
            boolean flag = true;
            //取出路径对应的字符
            while (flag) {
                if (!map.containsKey(stringBuilder.substring(i, i + count))) {
                    count++;
                } else {
                    flag = false;
                }
            }
            list.add(map.get(stringBuilder.substring(i, i + count)));
            i += count;
        }
        return list;
    }

    //将byte转化为二进制存入String
    private static String byteTransfertoBinaryString(byte b) {
        int temp = b;
        temp |= 256;
        String str = Integer.toBinaryString(temp);
        int len = str.length();
        return str.substring(len - 8);

    }

    //将所有步骤封装
    /*
     * @param bytes 原字符串对应的byte数组
     * @return : byte[] 压缩完后后将要发送的数组
     * @date : 2020/3/9 12:30
     */
    private static byte[] huffmanZip(byte[] bytes) {
        //现将数组变为装有node节点的list集合
        List nodes = getNodeList(bytes);
        //得到对应的huffman树
        Node root = getHuffman(nodes);
        getAllCodes(root);
        byte[] huffmanZipArr = getZipArr(bytes, huffmanCode);
        return huffmanZipArr;
    }

    //得到压缩后的字节数组
    /*
     * @param bytes 原字符串对应的byte数组,按照这个数组字符排列顺序打印字符对应的路径
     * @param huffmanCode 存放字符,和字符路径的map
     * @return : byte[] 将所有路劲汇总每8个当成一位
     * @date : 2020/3/9 12:07
     */
    private static byte[] getZipArr(byte[] bytes, Map huffmanCode) {
        //按照bytes中的字符排序顺序用路径替代字符存入stringBuilder
        StringBuilder stringBuilder = new StringBuilder();
        for (byte b : bytes) {
            stringBuilder.append(huffmanCode.get(b));
        }
        //将stringBuilder每8个二进制当做一个byte类型存入字节数组
        int len = 0;
        //用于补齐不足8位的地方
        String str = "";
        //确定字节数组长度
        if (stringBuilder.length() % 8 == 0) {
            len = stringBuilder.length() / 8;
        } else {
            len = stringBuilder.length() / 8 + 1;
            //不足8位的地方需要补0的个数
            zeloCount = 8 - (stringBuilder.length() % 8);
            for (int i = 0; i < zeloCount; i++) {
                str += "0";
              //  System.out.println("zelo:"+str);
            }
            stringBuilder.append(str);
        }
       // System.out.println("stringBuilder1:"+stringBuilder);
        //将stringbuild里面的二进制路径字符串每8位进行翻译成十进制
        return stringTransfertoByteArr(stringBuilder, len);
    }

    //将stringbuild里面的二进制路径字符串每8位进行翻译成十进制
    private static byte[] stringTransfertoByteArr(StringBuilder stringBuilder, int len) {
        byte[] huffmanCodeArr = new byte[len];
        //huffmanCodeArr的索引
        int index = 0;
        for (int i = 0; i < stringBuilder.length(); i += 8) {
            if (i + 8 > stringBuilder.length()) {//不够8位
                huffmanCodeArr[index] = (byte) Integer.parseInt(stringBuilder.substring(i), 2);
            } else {
                huffmanCodeArr[index] = (byte) Integer.parseInt(stringBuilder.substring(i, i + 8), 2);
            }
            index++;
        }
        return huffmanCodeArr;
    }

    //得到所有字符的路径
    private static void getAllCodes(Node root) {
        if (root == null) {
            return;
        }
        //记录路径,遍历huffman树时往左为0,往右为1直到找到对应字符;
        StringBuilder stringBuilder = new StringBuilder();
        getcode(root.left, "0", stringBuilder);
        getcode(root.right, "1", stringBuilder);

    }

    /*
     * @param node 当前需要遍历的节点
     * @param code 值为"0"或者"1"代表向左或者向右
     * @param stringBuilder
     * @return : void
     * @date : 2020/3/8 13:20
     */
    private static void getcode(Node node, String code, StringBuilder stringBuilder) {
        StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
        stringBuilder2.append(code);
        if (node != null) {
            if (node.value == null) {//证明这个点点为非叶子节点
                //向左递归
                getcode(node.left, "0", stringBuilder2);
                //向右递归
                getcode(node.right, "1", stringBuilder2);

            } else {
                huffmanCode.put(node.value, stringBuilder2.toString());
            }
        }
    }

    private static Node getHuffman(List nodes) {
        while (nodes.size() > 1) {
            Collections.sort(nodes);
            Node leftNode = nodes.get(0);
            Node rightNode = nodes.get(1);
            Node parent = new Node(null, leftNode.weight + rightNode.weight);
            parent.left = leftNode;
            parent.right = rightNode;
            nodes.remove(leftNode);
            nodes.remove(rightNode);
            nodes.add(parent);
        }
        return nodes.get(0);
    }

    //将数组变为List
    private static List getNodeList(byte[] bytes) {
        List nodes = new ArrayList<>();
        //遍历bytes,记录每个value出现的次数,出现的次数就是huffman树的权值
        Map counts = new HashMap<>();
        for (byte b : bytes) {
            Integer count = counts.get(b);
            if (count == null) {
                counts.put(b, 1);
            } else {
                counts.put(b, count + 1);
            }
        }
        for (Map.Entry map : counts.entrySet()) {
            nodes.add(new Node(map.getKey(), map.getValue()));
        }
        return nodes;
    }

    //前序遍历
    private static void preOrder(Node root) {
        if (root == null) {
            return;
        }
        root.preOrder();
    }

}

class Node implements Comparable {
    public Byte value;
    public int weight;
    Node left;
    Node right;

    public Node(Byte value, int weight) {
        this.value = value;
        this.weight = weight;

    }

    //前序遍历
    public void preOrder() {
        System.out.println(this);
        if (this.left != null) {
            this.left.preOrder();
        }
        if (this.right != null) {
            this.right.preOrder();
        }
    }

    @Override
    public String toString() {
        return "Node{" +
                "value=" + value +
                ", weight=" + weight +
                '}';
    }

    @Override
    public int compareTo(Node o) {
        return this.weight - o.weight;
    }
}

你可能感兴趣的:(数据结构与算法,算法)