JAVA实现哈夫曼编码、译码、压缩、解压

本文以"hello world!"进行哈夫曼编码、译码、压缩、解压
创建节点类 利用java集合类排序,实现Comparable接口,方便节点排序,此方法表示从小到大排序,若要从大到小排序,加一个负号即可。

public int compareTo(HNode o){
		// TODO Auto-generated method stub
		return this.weight - o.weight; //从小到大排序
	}
class HNode  implements Comparable{
	Byte data;
	int weight;  // 权值
	HNode left;
	HNode right;
	public HNode(int weight,Byte data) {
		this.weight = weight;
		this.data = data;
	}
	public HNode(int i) {
		this.weight = i;
	}
	@Override
	public String toString() {
		return "HNode [data=" + data + ", weight=" + weight + "]";
	}
	@Override
	public int compareTo(HNode o){
		// TODO Auto-generated method stub
		return this.weight - o.weight; //从小到大排序
	}
	public void preOrder () {
		System.out.println(this);
		if(this.left != null) {
			this.left.preOrder();
		}
		if(this.right != null) {
			this.right.preOrder();
		}
	}
}

接受字节数组 返回字符出现的频率

	private static List getNodes(byte[] bs){
		//创建一个arrayList
		ArrayList nodes = new ArrayList();	
		//遍历bs 统计每一个byte出现的次数  Map
		Map counts = new HashMap<>();
		for (byte b : bs) {
			Integer count = counts.get(b);
			if(count == null) { //map还没有这个字符  直接放入
				counts.put(b, 1);
			}else {
				counts.put(b, count + 1);
			}
		}	
		//把每个键值对 转化为NODe
		for (Map.Entry entry : counts.entrySet()) {
			nodes.add(new HNode(entry.getValue(), entry.getKey()));
		}
		return nodes;
	}

生成哈夫曼树
1、找到所有节点集合中较小的两个,分别做②叉树的左右节点
2、将二叉树的父节点加入集合
3、构建二叉树
4、从集合中删除1中找到的两个节点
5、循序1-4步骤 直到集合只剩一个元素,此节点为哈夫曼树的根节点,将集合的元素返回

public static HNode createHuffmanTree(List nodes) {
		//循序下列步骤  直到nodes.szie == 1;
		while(nodes.size() > 1) {
			//从小到大排序
			Collections.sort(nodes);
			//取出权值最小的两个节点
			HNode left = nodes.get(0);
			HNode right = nodes.get(1);
			//构建新的二叉树
			HNode parent = new HNode(left.weight + right.weight,null);
			
			parent.left = left;
			parent.right = right;
			//移除权值最小的两个节点
			nodes.remove(left);
			nodes.remove(right);
			nodes.add(parent);
			System.out.println(nodes);
		}
		 return nodes.get(0);
	}

哈夫曼编码;
将哈夫曼编码表放到 MAp里,在生成哈夫曼编码时,要确定某一节点的路径0-1-1-0-1,故此定义一个StringBuilder用来方便拼接路径,避免产生一堆中间对象
参数:node 传入节点 code 路径 左 0 右 1 stringBuilder 以前走过的路径

	static 	Map mapCode= new HashMap();
	static StringBuilder stringBuilder = new StringBuilder();
	private static void getCode(HNode node,String code,StringBuilder stringBuilder ) {
		StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
		//将code编码加入stringBuilder2
		stringBuilder2.append(code);
		if(node.data == null) {  // 非叶子节点,递归处理
			//向左递归
			getCode(node.left,"0",stringBuilder2);
			//向右递归
			getCode(node.right,"1",stringBuilder2);
		}else { //叶子节点
			mapCode.put(node.data,stringBuilder2.toString());
		}
	}

按位压缩:将字符串"hello world!"对应的byte[]转换成按位压缩后的Byte[]
1、遍历bytes数组,将每一个字符对应的哈夫曼编码存放在stringBuilder里面
2、将"10011110001010…"转换成byte[] by 数组 考虑最后一组不足8位的情况
3、申请index 记录第几个byte
4、遍历stringBuilder按8位截取放在by里面
5、返回by

private static byte[] zip(byte[] bytes,Map mapCode) {
		//stringBuilder存放哈夫曼编码的串
		StringBuilder stringBuilder = new StringBuilder();
		//遍历bytes
		for (byte b : bytes) {
			stringBuilder.append(mapCode.get(b));  
		}
		System.out.println(stringBuilder.toString() + "========================");
		//将"10011110001010.."转换成byte[]数组
//		int len = (stringBuilder.length() + 7 ) / 8;
		int len ;
		if(stringBuilder.length() % 8 == 0) {
			len = stringBuilder.length() / 8;
		}else {		
			len = stringBuilder.length() / 8 + 1;
		}
		//压缩后的byte 数组 by
		byte[] by = new byte[len];
		int index  = 0;  // 记录第几个byte
		for (int i = 0; i < stringBuilder.length(); i += 8) {
			String str ;
			if(i + 8 > stringBuilder.length()) {  //不够8位
				str = stringBuilder.substring(i);
			}else {
				str = stringBuilder.substring(i,i +8);
			}
			
			by[index] = (byte)Integer.parseInt(str,2);
			index ++;
		}
		return by;
 	}

哈夫曼译码:
1、得到huffmanBytes对应的二进制字符串 “0010101000…”
2、将byte[] 转换成二进制字符串 判断是不是最后一个字节
3、字符串按照指定的哈夫曼编码进行解码 将MAp进行调换。 a–110 110 --a
4、创建一个集合list 存放byte 遍历字符串进行匹配
5、list 存放"hello world!" 将list的数据放在byte[] 数组

/*1、将压缩过的byte[] 数组转换成String
参数: flag表示是否需要补高位你 最后一个字节无需补高位
返回对应的一个二进制的字符串 (按照补码返回,因为压缩是按照补码压缩的)*/
private static String byteToString(Boolean flag,byte b) {
		int temp = b;
		// 如果是正数 补高位
		if(flag) {
			temp |=256; //按位或256 
			//1 0000 0000 | 0000 0001 => 1 000 0001
		}
		String str = Integer.toBinaryString(temp);  // 返回temp 对应的二进制补码
		if(flag) {
			return str.substring(str.length() - 8);
		}
		return str;
	}

private static byte[] decode(Map huffmanCode ,byte[] huffmanBytes) { 
		//得到huffmanBytes对应的二进制字符串 "0010101000..."
		StringBuilder stringBuilder = new StringBuilder();
		//将byte[] 转换成二进制字符串
		for (int i = 0; i < huffmanBytes.length; i++) {
			byte b = huffmanBytes[i];
			//判断是不是最后一个字节
			Boolean flag = (i == huffmanBytes.length - 1);
			stringBuilder.append(byteToString(!flag, b));
		}
		//System.out.println(stringBuilder.toString());
		//字符串按照指定的哈夫曼编码进行解码
		//将MAp进行调换。 a--110 110 --a
		Map  map = new HashMap();
		for(Map.Entry entry:huffmanCode.entrySet()) {
			map.put(entry.getValue(), entry.getKey());
		}
		//System.out.println(map + "map");
		//创建一个集合 存放byte
		List list = new ArrayList();
		for (int i = 0; i < stringBuilder.length();) {
			int count  = 1;
			boolean flag = true;
			Byte b = null;
			while(flag) {
				//取出一个字节
				String key = stringBuilder.substring(i,i +count);
				b = map.get(key);
				if(b ==null) {  //没有匹配到
					count ++ ;
				}else {
					flag = false;
				}
			}
			list.add(b);
			i += count;
		}
		//list 存放"hello world!"
		//将list的数据放在byte[] 数组
		byte b[] = new byte[list.size()];
		for (int i = 0; i < b.length; i++) {
			b[i] = list.get(i);
		}
 		return b;
	}

测试:

public static void main(String[] args) {
		String content = "hello world!";
		byte[] bs = content.getBytes();
		System.out.println(Arrays.toString(huffmanZip(bs))) ;
		byte[] b =huffmanZip(bs);
		System.out.println(byteToString(true,(byte)-1));
		byte [] Rbs = decode(mapCode, b);
		System.out.println(new String(Rbs));
	}
	//封装
	private static byte[] huffmanZip( byte[] bs) {
		
		//创建哈夫曼树
		List arrayList = getNodes(bs);
		System.out.println(arrayList);
		HNode root = createHuffmanTree(arrayList);
		System.out.println("==================");
		preOrder(root);
		
		getCode(root, "", stringBuilder);
		return  zip(bs,mapCode);
	}

你可能感兴趣的:(学习总结)