博客说明
文章所涉及的资料来自互联网整理和个人总结,意在于个人学习和经验汇总,如有什么地方侵权,请联系本人删除,谢谢!
数据传输太长
i like like like java do you like a java // 共40个字符(包括空格)
105 32 108 105 107 101 32 108 105 107 101 32 108 105 107 101 32 106 97 118 97 32 100 111 32 121 111 117 32 108 105 107 101 32 97 32 106 97 118 97 //对应Ascii码
01101001 00100000 01101100 01101001 01101011 01100101 00100000 01101100 01101001 01101011 01100101 00100000 01101100 01101001 01101011 01100101 00100000 01101010 01100001 01110110 01100001 00100000 01100100 01101111 00100000 01111001 01101111 01110101 00100000 01101100 01101001 01101011 01100101 00100000 01100001 00100000 01101010 01100001 01110110 01100001 //对应的二进制
存在多义性
i like like like java do you like a java // 共40个字符(包括空格)
d:1 y:1 u:1 j:2 v:2 o:2 l:4 k:4 e:4 i:5 a:5 :9 // 各个字符对应的个数
0= , 1=a, 10=i, 11=e, 100=k, 101=l, 110=o, 111=v, 1000=j, 1001=u, 1010=y, 1011=d
说明:按照各个字符出现的次数进行编码,原则是出现次数越多的,则编码越小,比如 空格出现了9 次, 编码为0 ,其它依次类推.
按照上面给各个字符规定的编码,则我们在传输 "i like like like java do you like a java" 数据时,编码就是 10010110100...
i like like like java do you like a java // 共40个字符(包括空格)
d:1 y:1 u:1 j:2 v:2 o:2 l:4 k:4 e:4 i:5 a:5 :9 // 各个字符对应的个数
按照上面字符出现的次数构建一颗赫夫曼树, 次数作为权值
//根据赫夫曼树,给各个字符
//规定编码 , 向左的路径为0
//向右的路径为1 , 编码如下:
o: 1000 u: 10010 d: 100110 y: 100111 i: 101
a : 110 k: 1110 e: 1111 j: 0000 v: 0001
l: 001 : 01
按照上面的赫夫曼编码,我们的"i like like like java do you like a java" 字符串对应的编码为 (注意这里我们使用的无损压缩)
1010100110111101111010011011110111101001101111011110100001100001110011001111000011001111000100100100110111101111011100100001100001110
长度为 : 133
说明:
原来长度是 359 , 压缩了 (359-133) / 359 = 62.9%
此编码满足前缀编码, 即字符的编码都不能是其他字符编码的前缀。不会造成匹配的多义性
这个哈夫曼树根据排序方法不同,也可能不太一样,这样对应的赫夫曼编码也不完全一样,但是wpl 是一样的,都是最小的
package cn.guizimo.huffmancode;
import java.util.*;
/**
* @author guizimo
* @date 2020/8/8 11:55 上午
*/
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte[] contentBytes = content.getBytes();
//哈夫曼编码
byte[] zip = huffmanZip(contentBytes);
System.out.println("哈夫曼编码:" + Arrays.toString(zip));
}
private static byte[] huffmanZip(byte[] bytes){
List nodes = getNodes(bytes);
//哈夫曼树
Node huffmanTree = createHuffmanTree(nodes);
//哈夫曼编码表
Map huffmanCodes = getCodes(huffmanTree);
//哈夫曼编码
byte[] zip = zip(bytes, huffmanCodes);
return zip;
}
//压缩
private static byte[] zip(byte[] bytes, Map huffmanCodes) {
StringBuilder stringBuilder = new StringBuilder();
for (byte b : bytes) {
stringBuilder.append(huffmanCodes.get(b));
}
int len;
if (stringBuilder.length() % 8 == 0) {
len = stringBuilder.length() / 8;
} else {
len = stringBuilder.length() / 8 + 1;
}
byte[] by = new byte[len];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {
strByte = stringBuilder.substring(i);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
} else {
strByte = stringBuilder.substring(i, i + 8);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
}
}
return by;
}
static Map huffmanCodes = new HashMap();
static StringBuilder stringBuilder = new StringBuilder();
//重载
private static Map getCodes(Node root) {
if (root == null) {
return null;
}
getCodes(root.left, "0", stringBuilder);
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
//获取哈夫曼编码
private static void getCodes(Node node, String code, StringBuilder stringBuilder) {
StringBuilder builder = new StringBuilder(stringBuilder);
builder.append(code);
if (node != null) {
if (node.data == null) { //递归
getCodes(node.left, "0", builder);
getCodes(node.right, "1", builder);
} else {
huffmanCodes.put(node.data, builder.toString());
}
}
}
//前序遍历
private static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("哈夫曼树为空");
}
}
//生成哈夫曼树
private static Node createHuffmanTree(List nodes) {
while (nodes.size() > 1) {
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
//接收字节数组
private static List getNodes(byte[] bytes) {
List nodes = new ArrayList<>();
Map counts = new HashMap<>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
//遍历map
for (Map.Entry entry : counts.entrySet()) {
nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
}
class Node implements Comparable {
Byte data;
int weight; //字符出现的次数
Node left;
Node right;
//前序遍历
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public int compareTo(Node o) {
//从小到大排序
return this.weight - o.weight;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
package cn.guizimo.huffmancode;
import java.util.*;
/**
* @author guizimo
* @date 2020/8/8 11:55 上午
*/
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte[] contentBytes = content.getBytes();
//哈夫曼压缩
byte[] zip = huffmanZip(contentBytes);
System.out.println("哈夫曼压缩:" + Arrays.toString(zip));
//哈夫曼解压
byte[] unzip = huffmanUnzip(huffmanCodes, zip);
System.out.println("哈夫曼解压:" + new String(unzip));
}
//哈夫曼解压
private static byte[] huffmanUnzip(Map huffmanCodes, byte[] huffmanBytes) {
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < huffmanBytes.length; i++) {
byte b = huffmanBytes[i];
boolean flag = (i == huffmanBytes.length - 1);
stringBuilder.append(byteToBitString(!flag, b));
}
//解码,反向编码表
HashMap map = new HashMap<>();
for (Map.Entry entry : huffmanCodes.entrySet()) {
map.put(entry.getValue(), entry.getKey());
}
//根据编码扫描到对应的ASCLL码对应的字符
List list = new ArrayList<>();
for (int i = 0; i < stringBuilder.length(); ) {
int count = 1;
boolean flag = true;
Byte b = null;
while (flag) {
String key = stringBuilder.substring(i, i + count);
b = map.get(key);
if (b == null) {
count++;
} else {
flag = false;
}
}
list.add(b);
i += count;
}
byte b[] = new byte[list.size()];
for (int i = 0; i < b.length; i++) {
b[i] = list.get(i);
}
return b;
}
//转化二进制
private static String byteToBitString(boolean flag, byte b) {
int temp = b;
if (flag) {
temp |= 256;
}
String str = Integer.toBinaryString(temp);
if (flag) {
return str.substring(str.length() - 8);
} else {
return str;
}
}
//哈夫曼编码压缩
private static byte[] huffmanZip(byte[] bytes) {
List nodes = getNodes(bytes);
//哈夫曼树
Node huffmanTree = createHuffmanTree(nodes);
//哈夫曼编码表
Map huffmanCodes = getCodes(huffmanTree);
//哈夫曼编码
byte[] zip = zip(bytes, huffmanCodes);
return zip;
}
//压缩
private static byte[] zip(byte[] bytes, Map huffmanCodes) {
StringBuilder stringBuilder = new StringBuilder();
for (byte b : bytes) {
stringBuilder.append(huffmanCodes.get(b));
}
int len;
if (stringBuilder.length() % 8 == 0) {
len = stringBuilder.length() / 8;
} else {
len = stringBuilder.length() / 8 + 1;
}
byte[] by = new byte[len];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {
strByte = stringBuilder.substring(i);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
} else {
strByte = stringBuilder.substring(i, i + 8);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
}
}
return by;
}
static Map huffmanCodes = new HashMap();
static StringBuilder stringBuilder = new StringBuilder();
//重载
private static Map getCodes(Node root) {
if (root == null) {
return null;
}
getCodes(root.left, "0", stringBuilder);
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
//获取哈夫曼编码
private static void getCodes(Node node, String code, StringBuilder stringBuilder) {
StringBuilder builder = new StringBuilder(stringBuilder);
builder.append(code);
if (node != null) {
if (node.data == null) { //递归
getCodes(node.left, "0", builder);
getCodes(node.right, "1", builder);
} else {
huffmanCodes.put(node.data, builder.toString());
}
}
}
//前序遍历
private static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("哈夫曼树为空");
}
}
//生成哈夫曼树
private static Node createHuffmanTree(List nodes) {
while (nodes.size() > 1) {
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
//接收字节数组
private static List getNodes(byte[] bytes) {
List nodes = new ArrayList<>();
Map counts = new HashMap<>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
//遍历map
for (Map.Entry entry : counts.entrySet()) {
nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
}
class Node implements Comparable {
Byte data;
int weight; //字符出现的次数
Node left;
Node right;
//前序遍历
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public int compareTo(Node o) {
//从小到大排序
return this.weight - o.weight;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
感谢
尚硅谷
以及勤劳的自己