Java实现哈夫曼编码和解码

题目:将一个字符串进行哈夫曼编码;编码过程中,会得到每个字符的编码,通过已知的每个字符的编码对之前的编码进行解码。

分析:

首先是哈夫曼编码算法,引用李泽年写的《多媒体技术教程》中对哈夫曼编码算法的描述:

Initialization: Put all symbols on a list sorted according to their frequency counts.
Repeat until the list has only one symbolleft:
From the list pick two symbols with the lowest frequency counts. Form a Huffmansubtree that has these two symbols as child nodes and create a parent node.
Assign the sum of the children's frequency counts to the parent and insert it into the list such that the order is maintained.
Delete the children from the list.
Assign a codeword for each leaf based on the path from the root.
我的代码是基于这段算法描述实现的。实际上,我看的是中文版,但是没有找到该书的中文电子版,只好把英文版粘过来了。不过,好在英文版的也不复杂。


接下来是解码。虽然解码过程很简单,但是却是本文存在的理由。我在网上看了一些文章,都忽略一个问题:编码和解码过程中都有的东西是什么?也就是,依靠什么东西来解码?本文的答案是“每个字符的编码”,它在编码的过程中生成,和字符串编码一起传到解码端用于解码。你也可以说是“每个字符出现的次数”或者“哈夫曼树”,不管是“每个字符出现的次数”还是“哈夫曼树”,你都需要通过他们得到每个字符的编码之后才能进行解码。

下面是Java代码:

package com.liyuncong.algorithms.algorithms_huffman;

public class Node {
	private Node leftChild = null;
	private Data data = null;
	private Node rightChild = null;
	
	public Node getLeftChild() {
		return leftChild;
	}
	public void setLeftChild(Node leftChild) {
		this.leftChild = leftChild;
	}
	public Data getData() {
		return data;
	}
	public void setData(Data data) {
		this.data = data;
	}
	public Node getRightChild() {
		return rightChild;
	}
	public void setRightChild(Node rightChild) {
		this.rightChild = rightChild;
	}
	@Override
	public String toString() {
		return "Node [leftChild=" + leftChild + ", data=" + data
				+ ", rightChild=" + rightChild + "]";
	}
	
}

package com.liyuncong.algorithms.algorithms_huffman;

public class Data {
	private char c = 0;
	private int frequency = 0;
	
	public char getC() {
		return c;
	}
	public void setC(char c) {
		this.c = c;
	}
	public int getFrequency() {
		return frequency;
	}
	public void setFrequency(int frequency) {
		this.frequency = frequency;
	}
	
	@Override
	public String toString() {
		return "Data [c=" + c + ", frequency=" + frequency + "]";
	}
	
    
}

package com.liyuncong.algorithms.algorithms_huffman;

import java.util.Map;

public class EncodeResult {
	// 字符串编码后的结果
	private String encode;
	// 字符编码对
	private Map<Character, String> letterCode;
	public EncodeResult(String encode, Map<Character, String> letterCode) {
		super();
		this.encode = encode;
		this.letterCode = letterCode;
	}
	public String getEncode() {
		return encode;
	}
	public Map<Character, String> getLetterCode() {
		return letterCode;
	}
}

package com.liyuncong.algorithms.algorithms_huffman;

public interface HuffmanAlgorithm {
	public EncodeResult encode(String str);
	public String decode(EncodeResult decodeResult);
}

package com.liyuncong.algorithms.algorithms_huffman;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

public class HuffmanAlgorithmImpl1 implements HuffmanAlgorithm {

	public EncodeResult encode(String str) {
		ArrayList<Node> letterList = init(str);
		Node rootNode = createTree(letterList);
		Map<Character, String> letterCode = getLetterCode(rootNode);
		EncodeResult result = encode(letterCode, str);
		return result;
	}

	/**
	 * 得到字符串最终编码
	 */
	public EncodeResult encode(Map<Character, String> letterCode, String letters) {
		StringBuilder encode = new StringBuilder();
		for (int i = 0, length = letters.length(); i < length; i++) {
			Character character = letters.charAt(i);
			encode.append(letterCode.get(character));
		}
		EncodeResult result = new EncodeResult(encode.toString(), letterCode);
		return result;
	}

	/**
	 * 初始化 对输入的字符串建立哈夫曼树节点列表,并且,列表中节点是有序的
	 */
	private ArrayList<Node> init(String letters) {
		ArrayList<Node> letterList = new ArrayList<Node>();
		Map<Character, Integer> ci = new HashMap<Character, Integer>();
		for (int i = 0; i < letters.length(); i++) {
			Character character = letters.charAt(i);
			if (!ci.keySet().contains(character)) {
				ci.put(character, 1);
			} else {
				Integer oldValue = ci.get(character);
				ci.put(character, oldValue + 1);
			}
		}
		Set<Character> keys = ci.keySet();
		for (Character key : keys) {
			Node node = new Node();
			Data data = new Data();
			data.setC(key);
			data.setFrequency(ci.get(key));
			node.setData(data);
			letterList.add(node);
		}
		sort(letterList);
		return letterList;
	}

	/**
	 * 冒泡排序,把小的放在最后
	 */
	private void sort(ArrayList<Node> letterList) {
		int size = letterList.size();
		// 处理只有一个元素的情况,也就是说,不需要排序
		if (size == 1) {
			return;
		}
		for (int i = 0; i < size; i++) {
			for (int j = 0; j < size - 1 - i; j++) {
				if (letterList.get(j).getData().getFrequency() < letterList
						.get(j + 1).getData().getFrequency()) {
					Node tempNode = letterList.get(j);
					letterList.set(j, letterList.get(j + 1));
					letterList.set(j + 1, tempNode);

				}
			}
		}
	}

	/*
	 * 创建哈夫曼树; 丢失了letterList中的数据,深拷贝letterList是需要完善的地方
	 */
	private Node createTree(ArrayList<Node> letterList) {
		while (letterList.size() != 1) {
			int size = letterList.size();
			// 小的节点放在右边(眼睛看到的左边)
			Node nodeRight = letterList.get(size - 1);
			Node nodeLeft = letterList.get(size - 2);
			Node nodeParent = new Node();
			nodeParent.setRightChild(nodeRight);
			nodeParent.setLeftChild(nodeLeft);
			Data data = new Data();
			data.setFrequency(nodeRight.getData().getFrequency()
					+ nodeLeft.getData().getFrequency());
			nodeParent.setData(data);
			letterList.set(size - 2, nodeParent);
			letterList.remove(size - 1);
			sort(letterList);

		}
		Node rootNode = letterList.get(0);
		return rootNode;
	}

	/**
	 * 获得所有字符编码对
	 * 
	 * @param rootNode哈夫曼树的根节点
	 * @return 所有字符编码对
	 */
	private Map<Character, String> getLetterCode(Node rootNode) {
		Map<Character, String> letterCode = new HashMap<Character, String>();
		// 处理只有一个节点的情况
		if (rootNode.getLeftChild() == null && rootNode.getRightChild() == null) {
			letterCode.put(rootNode.getData().getC(), "1");
			return letterCode;

		}
		getLetterCode(rootNode, "", letterCode);
		return letterCode;
	}

	/**
	 * 先序遍历哈夫曼树,获得所有字符编码对
	 * 
	 * @param rooNode
	 * @param suffix
	 * @param letterCode
	 */
	private void getLetterCode(Node rooNode, String suffix,
			Map<Character, String> letterCode) {
		if (rooNode != null) {
			if (rooNode.getLeftChild() == null
					&& rooNode.getRightChild() == null) {
				Character character = rooNode.getData().getC();
				letterCode.put(character, suffix);

			}
			getLetterCode(rooNode.getLeftChild(), suffix + "0", letterCode);
			getLetterCode(rooNode.getRightChild(), suffix + "1", letterCode);

		}
	}

	public String decode(EncodeResult decodeResult) {
		// 解码得到的字符串
		StringBuffer decodeStr = new StringBuffer();
		// 获得解码器
		Map<String, Character> decodeMap = getDecoder(decodeResult
				.getLetterCode());
		// 解码器键集合
		Set<String> keys = decodeMap.keySet();
		// 待解码的(被编码的)字符串
		String encode = decodeResult.getEncode();
		// 从最短的开始匹配之所以能够成功,是因为哈夫曼编码的唯一前缀性质
		// 临时的可能的键值
		String temp = "";
		// 改变temp值大小的游标
		int i = 1;
		while (encode.length() > 0) {
			temp = encode.substring(0, i);
			if (keys.contains(temp)) {
				Character character = decodeMap.get(temp);
				decodeStr.append(character);
				encode = encode.substring(i);
				i = 1;
			} else {
				i++;
			}
		}
		return decodeStr.toString();
	}

	/**
	 * 获得解码器,也就是通过字母/编码对得到编码/字符对。
	 * 
	 * @param letterCode
	 * @return
	 */
	private Map<String, Character> getDecoder(Map<Character, String> letterCode) {
		Map<String, Character> decodeMap = new HashMap<String, Character>();
		Set<Character> keys = letterCode.keySet();
		for (Character key : keys) {
			String value = letterCode.get(key);
			decodeMap.put(value, key);
		}
		return decodeMap;
	}
}

package com.liyuncong.algorithms.algorithms_huffman;

import static org.junit.Assert.*;

import org.junit.Test;

public class HuffmanAlgorithmImpl1Test {

	@Test
	public void testEncodeString() {
		HuffmanAlgorithmImpl1 huffmanImpl1 = new HuffmanAlgorithmImpl1();
		EncodeResult result = huffmanImpl1.encode("abcdda");
		System.out.println(result.getEncode());
	}

	@Test
	public void testDecode() {
		HuffmanAlgorithmImpl1 huffmanImpl1 = new HuffmanAlgorithmImpl1();
		EncodeResult result = huffmanImpl1.encode("abcdda");
		String decode = huffmanImpl1.decode(result);
		System.out.println(decode);
	}


}

源码放在github上:

https://github.com/l294265421/algorithms-huffman.git


你可能感兴趣的:(java,算法,哈夫曼编码)