完全二叉树也可以用数组表示的。
/*Compressor.java -- 压缩一个数组内的数组,使用huffman方法 */ class BinaryHeap<T extends Comparable<T>> { /*堆的二叉树数组表示: 根节点i=0,节点i的左儿子是2*i+1,右儿子是2*i+2,父亲节点是(i-1)/2。 节点的值都比儿子值小,称为最小堆。 */ T[] arr; int cap; int last; /*[0,last)为当前的堆*/ public BinaryHeap(T[] _arr) { cap = _arr.length; arr = _arr;//(T[])new Object[cap]; last = 0; } void dump() { int i; for(i = 0; i < last; ++i){ System.out.print(" " + arr[i]); } System.out.println(); } public void push(T val) {/*从叶到某个内部目标节点,下移一位*/ int j, i = last; //todo: check capacity while(i > 0){ j = (i-1)/2; if(val.compareTo(arr[j]) >= 0) break; arr[i] = arr[j]; i = j; } arr[i] = val; ++last; } public T pop() {/*从根到某个节点,上移一位*/ T tmp, ret; int j, i = 0; if(last < 1){return null;} ret = arr[0]; arr[0] = arr[last-1]; arr[last-1] = ret; /*R1 加上这句是为了原地逆序*/ j = 2*i+1; while(j < last-1){ /*除开最后一个节点,如果有右儿子比左儿子小*/ if( j+1 < last-1 && arr[j].compareTo(arr[j+1]) > 0) j = j+1; if(arr[i].compareTo(arr[j]) <= 0){break;} tmp = arr[i]; arr[i] = arr[j]; arr[j] = tmp; /*若优化此处,则必须保留R1*/ i = j; j = 2*i+1; } --last; return ret; } } public class Compressor { static class Node implements Comparable<Node> { int val; int cnt; Node left, right; public Node(int _val, int _cnt, Node _l, Node _r){ val = _val; cnt = _cnt; left = _l; right = _r; } public int compareTo(Node b){ return (int)this.cnt - b.cnt; } public String toString(){ return ""+this.val+":"+this.cnt; } } static void dumpCode(Node root, String code){ if(null == root.left && null == root.right){ System.out.println(root.val + ":" + code); return; } if(null != root.left){dumpCode(root.left, code+"0");} if(null != root.right){dumpCode(root.right, code+"1"); } } public static void huffmanEncoding(byte[] data) { int[] cnt = new int[256]; int i, diff = 0; Node x, y, z = null; Node[] arr = new Node[256]; BinaryHeap<Node> bh = null; for(i = 0; i< data.length; ++i){cnt[data[i]]++;} for(i = 0; i < cnt.length; ++i){ if(0 == cnt[i])continue; arr[diff] = new Node(i, cnt[i], null, null); ++diff; } bh = new BinaryHeap<Node>(arr); for(i = 0; i < diff; ++i){ bh.push(arr[i]); } bh.dump(); for(i = 0; i < diff-1; ++i){ x = bh.pop(); y = bh.pop(); //System.out.println("x,y= "+x.val+":"+x.cnt + ", "+y.val +":" + y.cnt); z = new Node(255, x.cnt+y.cnt, x, y); bh.push(z); //bh.dump(); } dumpCode(z,""); } public static void main(String[] arg) { byte[] data = {48,49,50,50,50, 51,51, 52,52,52}; huffmanEncoding(data); } public static void main_(String[] arg) { Integer[] arr = {5,10,50,11,20,52,55,25,22}; int x, i, len = arr.length; BinaryHeap<Integer> bh = new BinaryHeap<Integer>(arr); for(i = 0; i < len; ++i){ bh.push(arr[i]); bh.dump(); } while(len-- > 0){ x = bh.pop(); System.out.print(x+", "); //bh.dump(); } System.out.println(); for(i = 0; i < bh.cap; ++i){System.out.print(bh.arr[i]+", ");} } } /* $ javac -encoding UTF-8 Compressor.java && java Compressor 48:1 49:1 50:3 51:2 52:3 51:00 48:010 49:011 50:10 52:11 */
/*
上面的编码过程有个缺点,就是要读两遍原始数据。
第一遍是为了统计频率建表。对于实时流,在实际编码例子中mpeg12/h264,这个表是规范根据普遍情形统计规定死的。运用huffman编码其实是最后一步,在之前针对变换后的系数块,还做了其他压缩工作:mpeg[1]有(run,level),run 是非零值level前零的个数;h264有CAVLC(TotalCoeff, TrailingOnes)[2],在毕厚杰的书里面,把8x8 Bytes压缩到了25bits。
编解码时虽说我们只查表就行了,实际上不这么简单,因为不止一张表要查,并且bit位宽可达16。对于[2], 采用三层分级表,我也没完全理解。
[1]http://www.bretl.com/mpeghtml/huffRL.HTM
[2]ffmpeg/h264_cavlc.c: decode_residual()/get_vlc2()
*/