将给出的一段文本,比如 “i like like like java do you like a java” , 根据前面的讲的赫夫曼编码原理,对其进行数据压缩处理
根据赫夫曼编码压缩数据的原理,需要创建 “i like like like java do you like a java” 对应的赫夫曼树
class Node implements Comparable<Node> {
Byte data;//存放数据 'a'=> 97
int weight;//权值:统计出现的次数
Node left;
Node right;
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
/**
* 前序遍历
*/
public void preOder() {
System.out.println(this);
if (this.left != null) this.left.preOder();
if (this.right != null) this.right.preOder();
}
@Override
public String toString() {
return new StringJoiner(", ", Node.class.getSimpleName() + "[", "]")
.add("data=" + data)
.add("weight=" + weight)
.toString();
}
//根据权值从小到大排序
@Override
public int compareTo(Node o) {
return this.weight - o.weight;
}
}
得到"i like like like java do you like a java"的byte[]数组
byte[] contentBytes = str.getBytes();
/**
* 接受字节数组返回list
* Node[data=32, weight=9],Node[data=100, weight=1].....
*
* @param bytes
* @return
*/
private static List<Node> getNodes(byte[] bytes) {
List<Node> list = new ArrayList<>();
//统计每个字符串出现的次数,使用map集合
HashMap<Byte, Integer> map = new HashMap<>();
for (byte aByte : bytes) {
Integer count = map.get(aByte);
if (count == null) {
//说明当前map中没有存入该字符
map.put(aByte, 1);
} else {
//说明之前存入过
count++;
map.put(aByte, count);
}
}
//将map中的数据取出放入list中
for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
list.add(new Node(entry.getKey(), entry.getValue()));
}
return list;
}
通过list返回一棵赫夫曼树
/**
* 通过list返回一棵赫夫曼树
*
* @param list
* @return
*/
private static Node getHoffumanTree(List<Node> list) {
while (list.size() > 1) {
Collections.sort(list);
Node leftNode = list.get(0);
Node rightNode = list.get(1);
//通过取出的两个节点权重计算他们的根节点,root没有date
Node root = new Node(null, (leftNode.weight + rightNode.weight));
root.left = leftNode;
root.right = rightNode;
list.remove(leftNode);
list.remove(rightNode);
list.add(root);
}
return list.get(0);
}
形式:32->01 97->100 100->11000
/**
* 重载,传一个根节点即可得到赫夫曼编码表
*
* @param node
*/
private static Map<Byte, String> getCodes(Node node) {
if (node == null) return null;
getCodes(node,"",stringBuilder);
return hoffuCodes;
}
/*
* 生成霍夫曼树对应的赫夫曼编码表
* 1、将赫夫曼编码表存放在map集合中Map
* 形式:32->01 97->100 100->11000
* 2、在生成赫夫曼编码表时,拼接路径,创建StringBuilder存储某个叶子节点的路径
*/
static Map<Byte, String> hoffuCodes = new HashMap<>();
static StringBuilder stringBuilder = new StringBuilder();
/**
* 功能:将传入的Node节点的所有叶子节点赫夫曼编码得到并存放到hoffumap中
*
* @param node 节点,默认根节点
* @param code 路径,左子节点为0,右子节点为1
* @param stringBuilder 拼接路径
*/
private static void getCodes(Node node, String code, StringBuilder stringBuilder) {
//进行字符串拼接
StringBuilder stringBuilder1 = new StringBuilder(stringBuilder);
stringBuilder1.append(code);
if (node != null) {
//if(node.data ==null) 不进行处理
if (node.data == null) {
//说明时非叶子节点,继续寻找直到找到某一个叶子节点
//往左边查找
getCodes(node.left, "0", stringBuilder1);
//往右边查找
getCodes(node.right, "1", stringBuilder1);
} else {
//如果当前已经为叶子节点,表示这个字符的赫夫曼编码已经产生,放入map集合中
hoffuCodes.put(node.data, stringBuilder1.toString());
}
}
}
=01 a=100 d=11000 u=11001 e=1110 v=11011 i=101
y=11010 j=0010 k=1111 l=000 o=0011
10101000101111111100100010111111110010001011111111001001010011011100011100000110111010001111001010 00101111111100110001001010011011100
/**
* 使用一个方法封装
*
* @param contentBytes 原始的字符串byte
* @return 经过赫夫曼编码处理过后的编码,压缩过后的编码
*/
private static byte[] huffmanZip(byte[] contentBytes) {
List<Node> nodes = getNodes(contentBytes);
//1、创建赫夫曼树
Node hofumanNode = getHoffumanTree(nodes);
//2、根据赫夫曼数生成对应的赫夫曼编码
Map<Byte, String> codes = getCodes(hofumanNode);
//3、根据赫夫曼编码对原始数据压缩,得到压缩后的字节码数组
byte[] zip = zip(contentBytes, codes);
return zip;
}
/**
* 编写一个方法,将字符串对应的byte[]数组,通过生成霍夫曼编码表,返回一个赫夫曼编码压缩过后的byte[]
*
* @param bytes 原始字符串对应的byte[]
* @param hoffuCodes 生成的赫夫曼编码表map
* @return 返回赫夫曼编码处理后的byte数组
* "i like like like java do you like a java" => byte[] contentBytes
* => 对应的byte[]hoffumanCodeByte,8位对应一个byte放入到hoffumanCodeByte
*/
private static byte[] zip(byte[] bytes, Map<Byte, String> hoffuCodes) {
//1、利用赫夫曼编码表,将byte数组转成赫夫曼编码字符串
StringBuilder hoffmanSB = new StringBuilder();
//遍历byte
for (byte b : bytes) {
hoffmanSB.append(hoffuCodes.get(b));
}
//2、将生成的hoffmanSB装成byte[],8位一个byte
int len = 0;
if (hoffmanSB.length() % 8 == 0) {
//长度刚好位8的整数
len = hoffmanSB.length() / 8;
} else {
len = hoffmanSB.length() / 8 + 1;
}
//简化:int length = (hoffmanSB.length() +7)/ 8 ;
//创建存储压缩后的byte[]
byte[] hoffumanByte = new byte[len];
int index = 0;//统计第多少个byte
for (int i = 0; i < hoffmanSB.length(); i += 8) {
//8位一个byte,所以步长为8
String strByte;
if (i + 8 > hoffmanSB.length()) {
//说明不够8位
strByte = hoffmanSB.substring(i);
} else {
strByte = hoffmanSB.substring(i, i + 8);
}
//将strByte转成byte放入到hoffumanByte
hoffumanByte[index] = (byte) Integer.parseInt(strByte);
index++;
}
return hoffumanByte;
}