本文以"hello world!"进行哈夫曼编码、译码、压缩、解压
创建节点类 利用java集合类排序,实现Comparable接口,方便节点排序,此方法表示从小到大排序,若要从大到小排序,加一个负号即可。
public int compareTo(HNode o){
// TODO Auto-generated method stub
return this.weight - o.weight; //从小到大排序
}
class HNode implements Comparable{
Byte data;
int weight; // 权值
HNode left;
HNode right;
public HNode(int weight,Byte data) {
this.weight = weight;
this.data = data;
}
public HNode(int i) {
this.weight = i;
}
@Override
public String toString() {
return "HNode [data=" + data + ", weight=" + weight + "]";
}
@Override
public int compareTo(HNode o){
// TODO Auto-generated method stub
return this.weight - o.weight; //从小到大排序
}
public void preOrder () {
System.out.println(this);
if(this.left != null) {
this.left.preOrder();
}
if(this.right != null) {
this.right.preOrder();
}
}
}
接受字节数组 返回字符出现的频率
private static List getNodes(byte[] bs){
//创建一个arrayList
ArrayList nodes = new ArrayList();
//遍历bs 统计每一个byte出现的次数 Map
Map counts = new HashMap<>();
for (byte b : bs) {
Integer count = counts.get(b);
if(count == null) { //map还没有这个字符 直接放入
counts.put(b, 1);
}else {
counts.put(b, count + 1);
}
}
//把每个键值对 转化为NODe
for (Map.Entry entry : counts.entrySet()) {
nodes.add(new HNode(entry.getValue(), entry.getKey()));
}
return nodes;
}
生成哈夫曼树
1、找到所有节点集合中较小的两个,分别做②叉树的左右节点
2、将二叉树的父节点加入集合
3、构建二叉树
4、从集合中删除1中找到的两个节点
5、循序1-4步骤 直到集合只剩一个元素,此节点为哈夫曼树的根节点,将集合的元素返回
public static HNode createHuffmanTree(List nodes) {
//循序下列步骤 直到nodes.szie == 1;
while(nodes.size() > 1) {
//从小到大排序
Collections.sort(nodes);
//取出权值最小的两个节点
HNode left = nodes.get(0);
HNode right = nodes.get(1);
//构建新的二叉树
HNode parent = new HNode(left.weight + right.weight,null);
parent.left = left;
parent.right = right;
//移除权值最小的两个节点
nodes.remove(left);
nodes.remove(right);
nodes.add(parent);
System.out.println(nodes);
}
return nodes.get(0);
}
哈夫曼编码;
将哈夫曼编码表放到 MAp
参数:node 传入节点 code 路径 左 0 右 1 stringBuilder 以前走过的路径
static Map mapCode= new HashMap();
static StringBuilder stringBuilder = new StringBuilder();
private static void getCode(HNode node,String code,StringBuilder stringBuilder ) {
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
//将code编码加入stringBuilder2
stringBuilder2.append(code);
if(node.data == null) { // 非叶子节点,递归处理
//向左递归
getCode(node.left,"0",stringBuilder2);
//向右递归
getCode(node.right,"1",stringBuilder2);
}else { //叶子节点
mapCode.put(node.data,stringBuilder2.toString());
}
}
按位压缩:将字符串"hello world!"对应的byte[]转换成按位压缩后的Byte[]
1、遍历bytes数组,将每一个字符对应的哈夫曼编码存放在stringBuilder里面
2、将"10011110001010…"转换成byte[] by 数组 考虑最后一组不足8位的情况
3、申请index 记录第几个byte
4、遍历stringBuilder按8位截取放在by里面
5、返回by
private static byte[] zip(byte[] bytes,Map mapCode) {
//stringBuilder存放哈夫曼编码的串
StringBuilder stringBuilder = new StringBuilder();
//遍历bytes
for (byte b : bytes) {
stringBuilder.append(mapCode.get(b));
}
System.out.println(stringBuilder.toString() + "========================");
//将"10011110001010.."转换成byte[]数组
// int len = (stringBuilder.length() + 7 ) / 8;
int len ;
if(stringBuilder.length() % 8 == 0) {
len = stringBuilder.length() / 8;
}else {
len = stringBuilder.length() / 8 + 1;
}
//压缩后的byte 数组 by
byte[] by = new byte[len];
int index = 0; // 记录第几个byte
for (int i = 0; i < stringBuilder.length(); i += 8) {
String str ;
if(i + 8 > stringBuilder.length()) { //不够8位
str = stringBuilder.substring(i);
}else {
str = stringBuilder.substring(i,i +8);
}
by[index] = (byte)Integer.parseInt(str,2);
index ++;
}
return by;
}
哈夫曼译码:
1、得到huffmanBytes对应的二进制字符串 “0010101000…”
2、将byte[] 转换成二进制字符串 判断是不是最后一个字节
3、字符串按照指定的哈夫曼编码进行解码 将MAp进行调换。 a–110 110 --a
4、创建一个集合list 存放byte 遍历字符串进行匹配
5、list 存放"hello world!" 将list的数据放在byte[] 数组
/*1、将压缩过的byte[] 数组转换成String
参数: flag表示是否需要补高位你 最后一个字节无需补高位
返回对应的一个二进制的字符串 (按照补码返回,因为压缩是按照补码压缩的)*/
private static String byteToString(Boolean flag,byte b) {
int temp = b;
// 如果是正数 补高位
if(flag) {
temp |=256; //按位或256
//1 0000 0000 | 0000 0001 => 1 000 0001
}
String str = Integer.toBinaryString(temp); // 返回temp 对应的二进制补码
if(flag) {
return str.substring(str.length() - 8);
}
return str;
}
private static byte[] decode(Map huffmanCode ,byte[] huffmanBytes) {
//得到huffmanBytes对应的二进制字符串 "0010101000..."
StringBuilder stringBuilder = new StringBuilder();
//将byte[] 转换成二进制字符串
for (int i = 0; i < huffmanBytes.length; i++) {
byte b = huffmanBytes[i];
//判断是不是最后一个字节
Boolean flag = (i == huffmanBytes.length - 1);
stringBuilder.append(byteToString(!flag, b));
}
//System.out.println(stringBuilder.toString());
//字符串按照指定的哈夫曼编码进行解码
//将MAp进行调换。 a--110 110 --a
Map map = new HashMap();
for(Map.Entry entry:huffmanCode.entrySet()) {
map.put(entry.getValue(), entry.getKey());
}
//System.out.println(map + "map");
//创建一个集合 存放byte
List list = new ArrayList();
for (int i = 0; i < stringBuilder.length();) {
int count = 1;
boolean flag = true;
Byte b = null;
while(flag) {
//取出一个字节
String key = stringBuilder.substring(i,i +count);
b = map.get(key);
if(b ==null) { //没有匹配到
count ++ ;
}else {
flag = false;
}
}
list.add(b);
i += count;
}
//list 存放"hello world!"
//将list的数据放在byte[] 数组
byte b[] = new byte[list.size()];
for (int i = 0; i < b.length; i++) {
b[i] = list.get(i);
}
return b;
}
测试:
public static void main(String[] args) {
String content = "hello world!";
byte[] bs = content.getBytes();
System.out.println(Arrays.toString(huffmanZip(bs))) ;
byte[] b =huffmanZip(bs);
System.out.println(byteToString(true,(byte)-1));
byte [] Rbs = decode(mapCode, b);
System.out.println(new String(Rbs));
}
//封装
private static byte[] huffmanZip( byte[] bs) {
//创建哈夫曼树
List arrayList = getNodes(bs);
System.out.println(arrayList);
HNode root = createHuffmanTree(arrayList);
System.out.println("==================");
preOrder(root);
getCode(root, "", stringBuilder);
return zip(bs,mapCode);
}