i like like like java do you like a java
。o: 1000 u: 10010 d: 100110 y: 100111 i: 101 a : 110 k: 1110 e: 1111 j: 0000 v: 0001 l: 001 : 01
。i like like like java do you like a java
字符串对应的编码为 1010100110111101111010011011110111101001101111011110100001100001110011001111000011001111000100100100110111101111011100100001100001110
。定义哈夫曼树节点类
// 定义节点类
class HuffmanCodeNode implements Comparable<HuffmanCodeNode>{
private Byte character;
private int value;
private HuffmanCodeNode leftNode;
private HuffmanCodeNode rightNode;
// 前序遍历
public void preOrder() {
System.out.println(this);
if (this.leftNode != null) {
this.leftNode.preOrder();
}
if (this.rightNode != null) {
this.rightNode.preOrder();
}
}
public HuffmanCodeNode(Byte character, int value) {
this.character = character;
this.value = value;
}
public Byte getCharacter() {
return character;
}
public void setCharacter(byte character) {
this.character = character;
}
public int getValue() {
return value;
}
public void setValue(int value) {
this.value = value;
}
public HuffmanCodeNode getLeftNode() {
return leftNode;
}
public void setLeftNode(HuffmanCodeNode leftNode) {
this.leftNode = leftNode;
}
public HuffmanCodeNode getRightNode() {
return rightNode;
}
public void setRightNode(HuffmanCodeNode rightNode) {
this.rightNode = rightNode;
}
@Override
public String toString() {
return "HuffmanCodeNode{" +
"character=" + character +
", value=" + value +
'}';
}
@Override
public int compareTo(HuffmanCodeNode o) {
// 升序排序
return this.value - o.value;
}
}
得到每个节点的数据与权值
public static List<HuffmanCodeNode> getNodes(String str) {
// 存放所有的节点
ArrayList<HuffmanCodeNode> nodes = new ArrayList<>();
// 临时存放节点的符号与权值
HashMap<Byte, Integer> nodeMap = new HashMap<>();
byte[] strBytes = str.getBytes();
// 计算字符串中每个字符的权值
for (byte strByte : strBytes) {
if (nodeMap.get(strByte) == null) {
nodeMap.put(strByte, 1);
}
else {
nodeMap.put(strByte, nodeMap.get(strByte) + 1);
}
}
Set<Map.Entry<Byte, Integer>> entrySet = nodeMap.entrySet();
// 将所有节点加入到列表中
for (Map.Entry<Byte, Integer> byteIntegerEntry : entrySet) {
nodes.add(new HuffmanCodeNode(byteIntegerEntry.getKey(), byteIntegerEntry.getValue()));
}
// 返回所有节点的符号与权值
return nodes;
}
根据节点构造哈夫曼树
// 构造哈夫曼树
public static HuffmanCodeNode createHuffmanCodeTree(List<HuffmanCodeNode> huffmanCodeNodes) {
while (huffmanCodeNodes.size() > 1) {
// 根据权值对节点进行升序排序
Collections.sort(huffmanCodeNodes);
// 获取最小权值的节点
HuffmanCodeNode leftNode = huffmanCodeNodes.remove(0);
// 获取第二小权值的节点
HuffmanCodeNode rightNode = huffmanCodeNodes.remove(0);
// 构造父节点
HuffmanCodeNode parent = new HuffmanCodeNode(null, leftNode.getValue() + rightNode.getValue());
parent.setLeftNode(leftNode);
parent.setRightNode(rightNode);
// 将父节点添加进列表中,继续参与运算
huffmanCodeNodes.add(parent);
}
// 返回树的根节点
return huffmanCodeNodes.get(0);
}
得到每个节点(字符)的哈夫曼编码
static StringBuilder stringBuilder = new StringBuilder();
// 保存符号与该符号对应的哈夫曼编码
static HashMap<Byte, String> hashMap = new HashMap<>();
// 重载 getHuffmanCodes
public static Map<Byte, String> getHuffmanCodes(HuffmanCodeNode root) {
if (root == null) {
return null;
}
// 向左递归
getHuffmanCodes(root.getLeftNode(), "0", stringBuilder);
// 向右递归
getHuffmanCodes(root.getRightNode(), "1", stringBuilder);
return hashMap;
}
// 得到哈夫曼编码
private static void getHuffmanCodes(HuffmanCodeNode node, String code, StringBuilder sb) {
StringBuilder stringBuilder = new StringBuilder(sb);
// 追加当前节点的编码
stringBuilder.append(code);
if (node != null) {
if (node.getCharacter() == null) { // 如果是非叶子节点
// 向左递归
getHuffmanCodes(node.getLeftNode(), "0", stringBuilder);
// 向右递归
getHuffmanCodes(node.getRightNode(), "1", stringBuilder);
} else { // 如果是叶子节点
hashMap.put(node.getCharacter(), stringBuilder.toString());
}
}
}
压缩字符串
/**
* 使用哈夫曼编码压缩字符串
* @param strBytes 要压缩的字符串的字节数组
* @param huffmanCodes 节点的哈夫曼编码
* @return 压缩后的字节数组
*/
public static byte[] zip(byte[] strBytes, Map<Byte, String> huffmanCodes) {
StringBuilder sb = new StringBuilder();
byte[] huffmanCodesBytes = null;
for (byte charByte : strBytes) {
sb.append(huffmanCodes.get(charByte));
}
if (sb.length() % 8 == 0) {
huffmanCodesBytes = new byte[sb.length() / 8];
} else {
huffmanCodesBytes = new byte[sb.length() / 8 + 1];
}
int index = 0;
for (int i = 0; i < sb.length(); i += 8) {
if (i + 8 > sb.length()) {
huffmanCodesBytes[index] = (byte) Integer.parseInt(sb.substring(i), 2);
} else {
huffmanCodesBytes[index] = (byte) Integer.parseInt(sb.substring(i, i + 8), 2);
}
index++;
}
return huffmanCodesBytes;
}
整合代码,方便使用
// 重载 zip() 方法
public static byte[] zip(String str) {
byte[] strBytes = str.getBytes();
List<HuffmanCodeNode> nodes = getNodes(str);
HuffmanCodeNode root = createHuffmanCodeTree(nodes);
Map<Byte, String> huffmanCodes = getHuffmanCodes(root);
byte[] huffmanCodesBytes = zip(str.getBytes(), huffmanCodes);
return huffmanCodesBytes;
}
测试
String str = "i like like like java do you like a java";
byte[] zip = HuffmanCodes.zip(str);
System.out.println(Arrays.toString(zip));
输出:
[-88, -65, -56, -65, -56, -65, -55, 77, -57, 6, -24, -14, -117, -4, -60, -90, 28]
将压缩后的字节转为二进制字符串
// 字节转二进制字符串
public static String byteToBitString(boolean flag, byte b) {
int temp = b;
if (flag) {
temp |= 256;
}
String str = Integer.toBinaryString(temp);
if (flag) {
return str.substring(str.length() - 8);
} else {
return str;
}
}
将字符串的哈夫曼编码转回为字符序列
// 将哈夫曼编码转回字符序列
public static byte[] decode(byte[] huffmanCodesBytes, Map<Byte, String> huffmanCodes) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < huffmanCodesBytes.length; i++) {
byte temp = huffmanCodesBytes[i];
boolean flag = (i == huffmanCodesBytes.length - 1);
sb.append(byteToBitString(!flag, temp));
}
HashMap<String, Byte> map = new HashMap<>();
for (Map.Entry<Byte, String> byteStringEntry : huffmanCodes.entrySet()) {
map.put(byteStringEntry.getValue(), byteStringEntry.getKey());
}
List<Byte> list = new ArrayList<>();
for (int i = 0; i < sb.length();) {
int count = 1;
boolean flag = true;
Byte b = null;
while (flag) {
String key = sb.substring(i, i + count);
b = map.get(key);
if (b == null) {
count++;
} else {
flag = false;
}
}
list.add(b);
i += count;
}
byte[] bytes = new byte[list.size()];
for (int i = 0; i < list.size(); i++) {
bytes[i] = list.get(i);
}
return bytes;
}