题目:将一个字符串进行哈夫曼编码;编码过程中,会得到每个字符的编码,通过已知的每个字符的编码对之前的编码进行解码。
分析:
首先是哈夫曼编码算法,引用李泽年写的《多媒体技术教程》中对哈夫曼编码算法的描述:
接下来是解码。虽然解码过程很简单,但是却是本文存在的理由。我在网上看了一些文章,都忽略一个问题:编码和解码过程中都有的东西是什么?也就是,依靠什么东西来解码?本文的答案是“每个字符的编码”,它在编码的过程中生成,和字符串编码一起传到解码端用于解码。你也可以说是“每个字符出现的次数”或者“哈夫曼树”,不管是“每个字符出现的次数”还是“哈夫曼树”,你都需要通过他们得到“每个字符的编码”之后才能进行解码。
下面是Java代码:
package com.liyuncong.algorithms.algorithms_huffman; public class Node { private Node leftChild = null; private Data data = null; private Node rightChild = null; public Node getLeftChild() { return leftChild; } public void setLeftChild(Node leftChild) { this.leftChild = leftChild; } public Data getData() { return data; } public void setData(Data data) { this.data = data; } public Node getRightChild() { return rightChild; } public void setRightChild(Node rightChild) { this.rightChild = rightChild; } @Override public String toString() { return "Node [leftChild=" + leftChild + ", data=" + data + ", rightChild=" + rightChild + "]"; } }
package com.liyuncong.algorithms.algorithms_huffman; public class Data { private char c = 0; private int frequency = 0; public char getC() { return c; } public void setC(char c) { this.c = c; } public int getFrequency() { return frequency; } public void setFrequency(int frequency) { this.frequency = frequency; } @Override public String toString() { return "Data [c=" + c + ", frequency=" + frequency + "]"; } }
package com.liyuncong.algorithms.algorithms_huffman; import java.util.Map; public class EncodeResult { // 字符串编码后的结果 private String encode; // 字符编码对 private Map<Character, String> letterCode; public EncodeResult(String encode, Map<Character, String> letterCode) { super(); this.encode = encode; this.letterCode = letterCode; } public String getEncode() { return encode; } public Map<Character, String> getLetterCode() { return letterCode; } }
package com.liyuncong.algorithms.algorithms_huffman; public interface HuffmanAlgorithm { public EncodeResult encode(String str); public String decode(EncodeResult decodeResult); }
package com.liyuncong.algorithms.algorithms_huffman; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.Set; public class HuffmanAlgorithmImpl1 implements HuffmanAlgorithm { public EncodeResult encode(String str) { ArrayList<Node> letterList = init(str); Node rootNode = createTree(letterList); Map<Character, String> letterCode = getLetterCode(rootNode); EncodeResult result = encode(letterCode, str); return result; } /** * 得到字符串最终编码 */ public EncodeResult encode(Map<Character, String> letterCode, String letters) { StringBuilder encode = new StringBuilder(); for (int i = 0, length = letters.length(); i < length; i++) { Character character = letters.charAt(i); encode.append(letterCode.get(character)); } EncodeResult result = new EncodeResult(encode.toString(), letterCode); return result; } /** * 初始化 对输入的字符串建立哈夫曼树节点列表,并且,列表中节点是有序的 */ private ArrayList<Node> init(String letters) { ArrayList<Node> letterList = new ArrayList<Node>(); Map<Character, Integer> ci = new HashMap<Character, Integer>(); for (int i = 0; i < letters.length(); i++) { Character character = letters.charAt(i); if (!ci.keySet().contains(character)) { ci.put(character, 1); } else { Integer oldValue = ci.get(character); ci.put(character, oldValue + 1); } } Set<Character> keys = ci.keySet(); for (Character key : keys) { Node node = new Node(); Data data = new Data(); data.setC(key); data.setFrequency(ci.get(key)); node.setData(data); letterList.add(node); } sort(letterList); return letterList; } /** * 冒泡排序,把小的放在最后 */ private void sort(ArrayList<Node> letterList) { int size = letterList.size(); // 处理只有一个元素的情况,也就是说,不需要排序 if (size == 1) { return; } for (int i = 0; i < size; i++) { for (int j = 0; j < size - 1 - i; j++) { if (letterList.get(j).getData().getFrequency() < letterList .get(j + 1).getData().getFrequency()) { Node tempNode = letterList.get(j); letterList.set(j, letterList.get(j + 1)); letterList.set(j + 1, tempNode); } } } } /* * 创建哈夫曼树; 丢失了letterList中的数据,深拷贝letterList是需要完善的地方 */ private Node createTree(ArrayList<Node> letterList) { while (letterList.size() != 1) { int size = letterList.size(); // 小的节点放在右边(眼睛看到的左边) Node nodeRight = letterList.get(size - 1); Node nodeLeft = letterList.get(size - 2); Node nodeParent = new Node(); nodeParent.setRightChild(nodeRight); nodeParent.setLeftChild(nodeLeft); Data data = new Data(); data.setFrequency(nodeRight.getData().getFrequency() + nodeLeft.getData().getFrequency()); nodeParent.setData(data); letterList.set(size - 2, nodeParent); letterList.remove(size - 1); sort(letterList); } Node rootNode = letterList.get(0); return rootNode; } /** * 获得所有字符编码对 * * @param rootNode哈夫曼树的根节点 * @return 所有字符编码对 */ private Map<Character, String> getLetterCode(Node rootNode) { Map<Character, String> letterCode = new HashMap<Character, String>(); // 处理只有一个节点的情况 if (rootNode.getLeftChild() == null && rootNode.getRightChild() == null) { letterCode.put(rootNode.getData().getC(), "1"); return letterCode; } getLetterCode(rootNode, "", letterCode); return letterCode; } /** * 先序遍历哈夫曼树,获得所有字符编码对 * * @param rooNode * @param suffix * @param letterCode */ private void getLetterCode(Node rooNode, String suffix, Map<Character, String> letterCode) { if (rooNode != null) { if (rooNode.getLeftChild() == null && rooNode.getRightChild() == null) { Character character = rooNode.getData().getC(); letterCode.put(character, suffix); } getLetterCode(rooNode.getLeftChild(), suffix + "0", letterCode); getLetterCode(rooNode.getRightChild(), suffix + "1", letterCode); } } public String decode(EncodeResult decodeResult) { // 解码得到的字符串 StringBuffer decodeStr = new StringBuffer(); // 获得解码器 Map<String, Character> decodeMap = getDecoder(decodeResult .getLetterCode()); // 解码器键集合 Set<String> keys = decodeMap.keySet(); // 待解码的(被编码的)字符串 String encode = decodeResult.getEncode(); // 从最短的开始匹配之所以能够成功,是因为哈夫曼编码的唯一前缀性质 // 临时的可能的键值 String temp = ""; // 改变temp值大小的游标 int i = 1; while (encode.length() > 0) { temp = encode.substring(0, i); if (keys.contains(temp)) { Character character = decodeMap.get(temp); decodeStr.append(character); encode = encode.substring(i); i = 1; } else { i++; } } return decodeStr.toString(); } /** * 获得解码器,也就是通过字母/编码对得到编码/字符对。 * * @param letterCode * @return */ private Map<String, Character> getDecoder(Map<Character, String> letterCode) { Map<String, Character> decodeMap = new HashMap<String, Character>(); Set<Character> keys = letterCode.keySet(); for (Character key : keys) { String value = letterCode.get(key); decodeMap.put(value, key); } return decodeMap; } }
package com.liyuncong.algorithms.algorithms_huffman; import static org.junit.Assert.*; import org.junit.Test; public class HuffmanAlgorithmImpl1Test { @Test public void testEncodeString() { HuffmanAlgorithmImpl1 huffmanImpl1 = new HuffmanAlgorithmImpl1(); EncodeResult result = huffmanImpl1.encode("abcdda"); System.out.println(result.getEncode()); } @Test public void testDecode() { HuffmanAlgorithmImpl1 huffmanImpl1 = new HuffmanAlgorithmImpl1(); EncodeResult result = huffmanImpl1.encode("abcdda"); String decode = huffmanImpl1.decode(result); System.out.println(decode); } }
https://github.com/l294265421/algorithms-huffman.git