python Huffman编码及解码

Huffman编码及解码

# coding:utf-8

#Tree-Node Type
class Node:
    def __init__(self,freq):
        self.left = None
        self.right = None
        self.father = None
        self.freq = freq
    def isLeft(self):
        return self.father.left == self
#create nodes创建叶子节点
def createNodes(freqs):
    return [Node(freq) for freq in freqs]

#create Huffman-Tree创建Huffman树
def createHuffmanTree(nodes):
    queue = nodes[:]
    while len(queue) > 1:
        queue.sort(key=lambda item:item.freq)
        node_left = queue.pop(0)
        node_right = queue.pop(0)
        node_father = Node(node_left.freq + node_right.freq)
        node_father.left = node_left
        node_father.right = node_right
        node_left.father = node_father
        node_right.father = node_father
        queue.append(node_father)
    queue[0].father = None
    return queue[0]
#Huffman编码
def huffmanEncoding(nodes,root):
    codes = [''] * len(nodes)
    for i in range(len(nodes)):
        node_tmp = nodes[i]
        while node_tmp != root:
            if node_tmp.isLeft():
                codes[i] = '0' + codes[i]
            else:
                codes[i] = '1' + codes[i]
            node_tmp = node_tmp.father
    return codes

# 解压缩huffman文件
def decode_huffman(input_string,  char_store, freq_store):
    #input_string 哈夫曼编码
    #char_store 字符集合 
    #freq_store 字符转编码01序列
    encode = ''
    decode = ''
    for index in range(len(input_string)):
        encode = encode + input_string[index]
        for item in zip(char_store, freq_store):
            if encode == item[1]:
                decode = decode + item[0]
                encode = ''
    return decode;           

#获取Huffman编码
def getHuffmanCode(string):   
    dict1 ={}
    for  i in string:
        if i in dict1.keys():
            dict1[i] += 1
        else :
            dict1[i] = 1 
    #将字符根据频次排序
    chars_freqs  = sorted(dict1.items(), key = lambda kv:(kv[1], kv[0]))
    #创建huffman节点树
    nodes = createNodes([item[1] for item in chars_freqs])
    root = createHuffmanTree(nodes)
    #每个字符的Huffman编码
    codes = huffmanEncoding(nodes,root)
    #print codes
    dict2 = {}
    for item in zip(chars_freqs,codes):
        #print 'Character:%s freq:%-2d   encoding: %s' % (item[0][0],item[0][1],item[1])
        dict2[item[0][0]] = item[1]
    str = ''
    for v in string:
        str += dict2[v]
    return [str,dict2]

getHuffmanCode(string):获取string字符串的01编码及单个字符和相对于的01序列
decode_huffman(string,chars,freqs):解码,参数为编码的返回值

你可能感兴趣的:(python)