SICP读书笔记-huffman编码的实现

阅读更多

huffman 编码是一种变长前缀式编码,通过利用被编码消息中符号的出现频率(频率出现越高的用越少的码),可以有效的节约空间。在 SICP 的2.3.4节通过实现一个huffman编码树来阐述通过表和数据抽象去操作集合和数的例子。

 

构建 huffman 编码树


  • huffman 树以表的方式来表示,将树分为 叶子节点*和 *非叶子节点

('leaf symbol weight) : 叶子节点,包含标示叶子的符号'leaf, 实际字符 symbol,权重 weight
(left right symbols weight): 非叶子节点, 包含左子树 left, 右子树 right, 实际字符表(它孩子节点的符号汇总表), 权重 weight (它孩子节点的权重之和).

  • 构建过程

将符号和其对应的频率如 (A 4) (B 2) (C 3) (D 1) *变换为叶子节点的有序表(按权重升序), 然后反复归并集合中具有最小权重的2个元素,直到集合中只剩下一个元素,那么这个元素就是我们所需要的 *huffman 树.

(define (generate-huffman-tree pairs)
 (define (successive-merge entry-set)
  (cond ((null? entry-set) '())
        ((null? (cdr entry-set)) (car entry-set))
        (else
         (successive-merge (adjoin-set
                            (make-code-tree (car entry-set) (cadr entry-set))
                            (cddr entry-set))))))
 (successive-merge (make-leaf-set pairs)))

 


通过huffman编码和解码


  • 编码通过针对消息中的每个字符,遍历 huffman 数,如果往左则增加一个0,往右为1,到达叶子节点时得到的2进制序列就是该字符的编码。以下是针对单个符号的编码算法:
;编码单个字符
(define (encode-symbol symbol tree)
 (if (leaf? tree)
     '()
     (let ((code-br-pair (encode-branch symbol tree)))
          (cons
           (car code-br-pair)
           (encode-symbol symbol (cadr code-br-pair))))))

;根据字符是在左树还是右树进行编码
(define (encode-branch symbol tree)
 (let (
       (left (left-branch tree))
       (right (right-branch tree))
      )
  (cond ((member? symbol (symbols left)) (list 0 left))
        ((member? symbol (symbols right)) (list 1 right))
        (else (error "symbol not int left or right branch - " symbol)))))
解码以一串二进制序列和对应的 huffman 数为参数,逐个根据二进制序列中的值决定遍历树的走向,0向左走,1向右走,到达叶子节点则该叶子节点的symbol即为解码的符号,继续剩下的序列,直到序列为空。
(define (decode bits tree)
 (define (decode-l bits current-branch)
  (if (null? bits)
      '()
      (let ((next-branch (choose-branch (car bits) current-branch)))
       (if (leaf? next-branch)
           (cons (symbol-leaf next-branch)
                 (decode-l (cdr bits) tree))
           (decode-l (cdr bits) next-branch)))))
 (decode-l bits tree))

(define (choose-branch bit branch)
 (cond ((= bit 0) (left-branch branch))
       ((= bit 1) (right-branch branch))
       (else (error "bad bit -- CHOOSE-BRANCH" bit))))

 


完整的代码如下


  • 树的构建 huffman-tree.scm
;构建如((A 4) (B 2) (C 1) (D 1))的符号和权重的序对列表,构建huffman树
(define (generate-huffman-tree pairs)
 (define (successive-merge entry-set)
  (cond ((null? entry-set) '())
        ((null? (cdr entry-set)) (car entry-set))
        (else
         (successive-merge (adjoin-set
                            (make-code-tree (car entry-set) (cadr entry-set))
                            (cddr entry-set))))))
 (successive-merge (make-leaf-set pairs)))

;定义树叶子的表示法
(define (make-leaf symbol weight)
 (list 'leaf symbol weight))
;判断是否是叶子节点
(define (leaf? object)
 (eq? (car object) 'leaf))
;获取叶子节点的符号
(define (symbol-leaf x) (cadr x))
;获取叶子节点的权重
(define (weight-leaf x) (caddr x))
;获取树的符号表
(define (symbols tree)
 (if (leaf? tree)
     (list (symbol-leaf tree))
     (caddr tree)))
;获取树的权重
(define (weight tree)
 (if (leaf? tree)
     (weight-leaf tree)
     (cadddr tree)))
;获取树的左子树
(define (left-branch tree) (car tree))
;获取树的右子树
(define (right-branch tree) (cadr tree))
;树表示为1个具有4个元素的表:左节点,右节点,符号列表,权重
(define (make-code-tree left right)
 (list left
       right
       (append (symbols left) (symbols right))
       (+ (weight left) (weight right))))
;根据权重,构建叶子和树的有序标,方便归并一对最小项
(define (adjoin-set x set)
 (cond ((null? set) (list x))
        ((> (weight x) (weight (car set))) (cons (car set) (adjoin-set x (cdr set))))
        (else (cons x set))))
;构造叶子的初始排序集合
(define (make-leaf-set pairs)
 (if (null? pairs)
     '()
     (let ((pair (car pairs)))
      (adjoin-set (make-leaf (car pair) (cadr pair)) (make-leaf-set (cdr pairs))))))

 

  • 编码和解码 huffman-code.scm
(load "huffman-tree.scm")
;编码消息
(define (encode message tree)
 (if (null? message)
     '()
     (append
      (encode-symbol (car message) tree)
      (encode (cdr message) tree))))

;编码单个字符
(define (encode-symbol symbol tree)
 (if (leaf? tree)
     '()
     (let ((code-br-pair (encode-branch symbol tree)))
          (cons
           (car code-br-pair)
           (encode-symbol symbol (cadr code-br-pair))))))

;根据字符是在左树还是右树进行编码
(define (encode-branch symbol tree)
 (let (
       (left (left-branch tree))
       (right (right-branch tree))
      )
  (cond ((member? symbol (symbols left)) (list 0 left))
        ((member? symbol (symbols right)) (list 1 right))
        (else (error "symbol not int left or right branch - " symbol)))))
;包含关系判断
(define (member? item set)
    (not (equal? (member item set) false)))
;解码消息
(define (decode bits tree)
 (define (decode-l bits current-branch)
  (if (null? bits)
      '()
      (let ((next-branch (choose-branch (car bits) current-branch)))
       (if (leaf? next-branch)
           (cons (symbol-leaf next-branch)
                 (decode-l (cdr bits) tree))
           (decode-l (cdr bits) next-branch)))))
 (decode-l bits tree))

(define (choose-branch bit branch)
 (cond ((= bit 0) (left-branch branch))
       ((= bit 1) (right-branch branch))
       (else (error "bad bit -- CHOOSE-BRANCH" bit))))

 

  • 测试代码 huffman-use.scm
(load "huffman-code.scm")
;定义一个特定的文本串
(define message '(a a b a c a c b b d d d d d d))
;定义初始的叶子节点
(define leaf-set (list '(a 4) '(b 3) '(c 2) '(d 6)))
;根据叶子节点,生成对应的huffman树
(define huffman (generate-huffman-tree leaf-set))
;encode
(define bits (encode message huffman))
(display bits)
;decode
(define msg (decode bits huffman))
(newline)
(display msg)

 

  • 测试结果如下
(1 0 1 0 1 1 1 1 0 1 1 0 1 0 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0)
(a a b a c a c b b d d d d d d)

 

你可能感兴趣的:(huffman,霍夫曼编码,lisp,scheme,SICP)