赫夫曼树又称为最优二叉树,那什么叫做最优二叉树呢?最优二叉树是指n个叶子节点构成的所有二叉树中,带权路径最小的一个。我们用wpl来表示树的带权路径长度,如下图所示:
图a的带权路径长度 wpl = 92+42+52+22 = 40
图b的带权路径长度 wpl = 91+52+43+23 = 37
图c的带权路径长度 wpl = 41+22+53+93 = 50
由结果我们可以知道,权值越大的节点越靠近根节点,那么带权路径长度就越小。
根据赫夫曼树的概述我们可以知道,想要实现创建赫夫曼树,我们必须要对元素进行一个排序,然后按照权值越大的靠近根节点的实现方式,就可以生成一颗棵赫夫曼树。
如下图所示我们有这样一组已排好序的元素
按照以下原则去生成赫夫曼树
1、取出节点权值最小的两棵二叉树
2、组成一棵新的二叉树,步骤1取出的两棵二叉树是该树的两棵子树
3、根节点的权值是取出的两棵二叉树根节点的权值之和
package huofuman;
public class Node {
private int value;
private Node left;
private Node right;
public Node(int value) {
this.value = value;
}
public int getValue() {
return value;
}
public Node getLeft() {
return left;
}
public void setLeft(Node left) {
this.left = left;
}
public Node getRight() {
return right;
}
public void setRight(Node right) {
this.right = right;
}
@Override
public String toString() {
return "Node{" +
"value=" + value +
'}';
}
}
package huofuman;
import java.util.*;
public class HuffmanTree {
/**
* 创建赫夫曼树
*
* @param arr
* @return
*/
public Node creatHumanTree(int[] arr) {
//先使用数组中所有元素创建一棵二叉树(只有一个节点)
List<Node> nodeList = new ArrayList<>();
for (int value : arr) {
nodeList.add(new Node(value));
}
while (nodeList.size() > 1) {
//先对元素进行一个排序
Collections.sort(nodeList, new Comparator<Node>() {
@Override
public int compare(Node o1, Node o2) {
return o2.getValue() - o1.getValue();
}
});
//取出权值最小的两个二叉树
Node left = nodeList.get(nodeList.size() - 1);
Node right = nodeList.get(nodeList.size() - 2);
//构建一棵新的二叉树
Node parent = new Node(left.getValue() + right.getValue());
parent.setLeft(left);
parent.setRight(right);
//把取出的两棵二叉树移除
nodeList.remove(left);
nodeList.remove(right);
//放入原来的二叉树中
nodeList.add(parent);
}
return nodeList.get(0);
}
}
赫夫曼编码在通信领域中有着非常重要的应用,通过赫夫曼编码对通信的信息进行压缩,赫夫曼编码使得相同的一句话可以传输更少的信息,我们知道相同时间内信息传输越少,那就意味着我们的传输效率更高。下面我们来看看赫夫曼编码到底是如果对数据进行压缩的。
假设我们想要在网络上传输这样一句话: canyoucanacar
我们先统计这段话中一些字母出现的次数:y:1 u:1 r:1 o:1 n:2 c:3 a:4
我们将每一个统计的结果作为一个二叉树的节点,可以得到如下的赫夫曼树:
赫夫曼树的任何一个叶子节点的路径都是唯一的,这样我们可以得到唯一识别的赫夫曼编码表
这样生成的二进制文件就是这个样子的:
0001100101110111
而我们最开始的那一句话转成的二进制文件是这个样子的(假设我们按照八位传输):
我们将其转成 ASCII码
99 97 110 121 111 117 99 97 110 97 114
对应的二进制
011000110110000101101110001111001011011110111010101100011011000010110111001110010
对比一下我们压缩的长度,压缩率达到 72%,根据赫夫曼编码的性质我们可以知道,数据重复度越高,赫夫曼压缩的压缩率就越大。
赫夫曼数据压缩需要经过四个过程
我们来看代码的实现:
1、节点创建
package hfm;
import javafx.util.Pair;
public class HuffmanNode {
private Pair<Byte,Integer> value;
private HuffmanNode leftNode;
private HuffmanNode rightNode;
public Pair<Byte, Integer> getValue() {
return value;
}
public HuffmanNode(Pair<Byte, Integer> value) {
this.value = value;
}
public HuffmanNode getLeftNode() {
return leftNode;
}
public void setLeftNode(HuffmanNode leftNode) {
this.leftNode = leftNode;
}
public HuffmanNode getRightNode() {
return rightNode;
}
public void setRightNode(HuffmanNode rightNode) {
this.rightNode = rightNode;
}
}
2、解压缩方法
package hfm;
import javafx.util.Pair;
import java.util.*;
public abstract class HuffmanUtil {
private static Map<Byte, String> huffmanMap = new HashMap<>();
private static StringBuilder stringBuilder = new StringBuilder();
;
/**
* 创建节点
*
* @return
*/
public static List<HuffmanNode> getNode(byte[] bytes) {
List<HuffmanNode> huffmanNodeList = new ArrayList<>();
if (bytes == null || bytes.length == 0) {
return huffmanNodeList;
}
Map<Byte, Integer> counts = new HashMap<>();
//统计每一个byte出现的次数
for (byte by : bytes) {
Integer count = counts.get(by);
if (count == null) {
counts.put(by, 1);
} else {
count += 1;
counts.put(by, count++);
}
}
//将键值对转成一个键值对
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
HuffmanNode node = new HuffmanNode(new Pair<>(entry.getKey(), entry.getValue()));
huffmanNodeList.add(node);
}
return huffmanNodeList;
}
/**
* 创建赫夫曼树
*
* @param huffmanNodeList huffmanNodeList
* @return
*/
public static HuffmanNode creatHumanTree(List<HuffmanNode> huffmanNodeList) {
while (huffmanNodeList.size() > 1) {
Collections.sort(huffmanNodeList, new Comparator<HuffmanNode>() {
@Override
public int compare(HuffmanNode o1, HuffmanNode o2) {
return o2.getValue().getValue() - o1.getValue().getValue();
}
});
HuffmanNode leftNode = huffmanNodeList.get(huffmanNodeList.size() - 1);
HuffmanNode rightNode = huffmanNodeList.get(huffmanNodeList.size() - 2);
//创建一棵新的树
int value = leftNode.getValue().getValue() + leftNode.getValue().getValue();
HuffmanNode parent = new HuffmanNode(new Pair<>(null, value));
parent.setLeftNode(leftNode);
parent.setRightNode(rightNode);
huffmanNodeList.remove(leftNode);
huffmanNodeList.remove(rightNode);
huffmanNodeList.add(parent);
}
return huffmanNodeList.get(0);
}
/**
* 根据赫夫曼树 获取赫夫曼编码
*
* @param node
* @return
*/
public static Map<Byte, String> getHuffmanMap(HuffmanNode node) {
if (node == null) {
return huffmanMap;
}
getCodes(node.getLeftNode(), "0", stringBuilder);
getCodes(node.getRightNode(), "1", stringBuilder);
return huffmanMap;
}
/**
* 数据压缩
*
* @param bytes
* @param huffCodes
* @return
*/
public static byte[] zip(byte[] bytes, Map<Byte, String> huffCodes) {
StringBuilder stringBuilder = new StringBuilder();
for (byte by : bytes) {
stringBuilder.append(huffCodes.get(by));
}
int len;
if (stringBuilder.length() % 8 == 0) {
len = stringBuilder.length() / 8;
} else {
len = stringBuilder.length() / 8 + 1;
}
//定义长度
byte[] newBytes = new byte[len];
int idex = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {
strByte = stringBuilder.substring(i);
} else {
strByte = stringBuilder.substring(i, i + 8);
}
newBytes[idex] = (byte) Integer.parseInt(strByte, 2);
idex++;
}
return newBytes;
}
private static void getCodes(HuffmanNode node, String path, StringBuilder sb) {
if (node == null) {
return;
}
StringBuilder stringBuilder = new StringBuilder(sb);
stringBuilder.append(path);
if (node.getValue().getKey() == null) {
getCodes(node.getLeftNode(), "0", stringBuilder);
getCodes(node.getRightNode(), "1", stringBuilder);
} else {
huffmanMap.put(node.getValue().getKey(), stringBuilder.toString());
}
}
/**
* 使用赫夫曼解码
*
* @param codeMap
* @param zipBytes
* @return
*/
public static byte[] decode(Map<Byte, String> codeMap, byte[] zipBytes) {
//首先将byte数组转成一个二进制的字符串
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < zipBytes.length; i++) {
boolean flag = i == zipBytes.length - 1;
stringBuilder.append(byteToString(!flag, zipBytes[i]));
}
//将二进制按照赫夫曼编码表解码
Map<String, Byte> deCodeMap = new HashMap<>();
for (Map.Entry<Byte, String> entry : codeMap.entrySet()) {
deCodeMap.put(entry.getValue(), entry.getKey());
}
List<Byte> byteList = new ArrayList<>();
//处理字符串
for (int i = 0; i < stringBuilder.length();) {
int count = 1;
boolean flag = true;
while (flag) {
String s = stringBuilder.substring(i, i+count);
Byte by = deCodeMap.get(s);
if (by == null) {
count++;
} else {
byteList.add(by);
flag = false;
}
}
i += count;
}
//将集合转为数组
byte[] byt = new byte[byteList.size()];
for (int i = 0; i <byteList.size(); i++) {
byt[i] = byteList.get(i);
}
return byt;
}
private static String byteToString(boolean flag, byte by) {
int tem = by;
if (flag) {
tem |= 256;
String str = Integer.toBinaryString(tem);
return str.substring(str.length() - 8);
}
return Integer.toBinaryString(tem);
}
}
package hfm;
import java.io.*;
import java.util.List;
import java.util.Map;
public abstract class HuffmanFileUtil {
/**
* 赫夫曼压缩文件
*
* @param str
* @param dst
* @throws IOException
*/
public static void zipFile(String str, String dst) {
InputStream inputStream = null;
OutputStream outputStream = null;
ObjectOutput objectOutput = null;
try {
inputStream = new FileInputStream(str);
byte[] bytes = new byte[inputStream.available()];
//读取文件内容
inputStream.read(bytes);
List<HuffmanNode> nodes = HuffmanUtil.getNode(bytes);
HuffmanNode node = HuffmanUtil.creatHumanTree(nodes);
Map<Byte, String> huffmanMap = HuffmanUtil.getHuffmanMap(node);
byte[] zipByte = HuffmanUtil.zip(bytes, huffmanMap);
outputStream = new FileOutputStream(dst);
objectOutput = new ObjectOutputStream(outputStream);
objectOutput.writeObject(zipByte);
objectOutput.writeObject(huffmanMap);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
objectOutput.close();
outputStream.close();
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 赫夫曼解压文件
*
* @param src
* @param dst
* @throws Exception
*/
public static void unzip(String src, String dst) throws Exception {
InputStream inputStream = null;
OutputStream outputStream = null;
ObjectInputStream objectInputStream = null;
try {
inputStream = new FileInputStream(src);
objectInputStream = new ObjectInputStream(inputStream);
//读取文件内容
byte[] by = (byte[]) objectInputStream.readObject();
//读取编码表
Map<Byte, String> hufman = (Map<Byte, String>) objectInputStream.readObject();
byte[] bytes = HuffmanUtil.decode(hufman, by);
outputStream = new FileOutputStream(dst);
outputStream.write(bytes);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
inputStream.close();
objectInputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}