实现思想/算法描述: 1. 逐行读入权值,并将权值输入每个哈夫曼节点中,再将其送入数组,排序后选出最小的两个节点拿出进行合并,将合并后的节点送入数组,并重新排序,然后以上述步骤循环将最后数组中唯一的节点拿出,也就是所需的哈夫曼树 2. 对于建好的哈夫曼树,只要经过前序遍历,即可读出所需编码,然后将编码倒序排列,新建类Block,用于储存字符和对应权重以及编码,先将字符及权重初始化完毕,按从小到大排列,将编码按长度进行从大到小排列,从而一一对应将编码写入,然后打开文件,将原文读入,进行逐行扫描并遍历Block数组实现所有字符的替换,最后将结果写入编码文件 3. 解码时,同样是对应Block数组,进行循环遍历,将正则表达式匹配的编码译为字符 4. 打印字符时,循环计数即可实现每行定字符数输出 5. 打印哈夫曼树时,对哈夫曼树进行层序遍历,把每一层格式化为字符串,然后逐层输出 python实现代码: import re #引入正则表达式处理包 #新建哈夫曼树类 class Hafftree: def __init__(self,weight): self.leftchild=None self.rightchild=None self.parent=None self.weight=weight
#排序方法,使用的是冒泡排序 def sort(list,start,number): if list[start]==0: return for i in range(1,number-1): for j in range(start,number-i): if list[i].weight>list[i+1].weight: temp=list[i] list[i]=list[i+1] list[i+1]=temp else: continue
#初始化一个具有20个空间的哈夫曼数组 li=[Hafftree(0)]*20 #定义初始化类,用于生成每个权重对应的哈夫曼节点,并存储在数组中 def init(number,infofile): global li weight_pa=re.compile(r"\d+") file=open(infofile) allmessage=file.readlines() for i in range(0,len(allmessage)): message=allmessage[i] weight=re.findall(weight_pa,message)[0] weight=int(weight) li[i]=Hafftree(weight)
#初始化根节点 firsttree=Hafftree(0) #定义建哈夫曼树的方法,用于从哈夫曼数组中建成一个哈夫曼树并读出到根节点中 def buildtree(list,number): global firsttree for i in range(0,number-1): sort(list,i,number) newnode=Hafftree(list[i].weight+list[i+1].weight) newnode.leftchild=list[i] newnode.rightchild=list[i+1] list[i+1]=newnode firsttree=list[number-1]
code=[] string="" #前序遍历,用于生成哈夫曼编码 def pre_order(root,string): global code if root!=None: if root.leftchild==None and root.rightchild==None: '''print(string)''' code.append(string) return 0 else: pre_order(root.leftchild,string+"0") pre_order(root.rightchild,string+"1") else: return 0
#Block类用于储存关键字,权重及编码 class Block: def __init__(self,weight,key): self.weight=weight self.key=key self.code="" list=[] #该方法用于给文件编码 def Coding(infofile,decodefile,encodefile): global list global code for i in range(0,len(code)): for j in range(0,len(code)-i-1): if len(code[j])<len((code[j+1])): temp=code[j+1] code[j+1]=code[j] code[j]=temp weight_pa = re.compile(r"\d") key_pa=re.compile(r"\w") file = open(infofile) allmessage = file.readlines() #初始化Block数组 for i in range(0, len(allmessage)): message = allmessage[i] weight = int(re.findall(weight_pa, message)[0]) key=re.findall(key_pa,message)[0] list.append(Block(weight,key)) #对数组中节点按照权重进行排列 for i in range(1, len(list) - 1): for j in range(0, len(list) - i): if list[j].weight > list[j + 1].weight: temp = list[j + 1] list[j + 1] = list[j] list[j] = temp #将对应编码赋值给Block节点 for i in range(0,len(list)): list[i].code=code[i] for i in range(0,len(list)): '''print(list[i].key+" "+str(list[i].weight)+" "+list[i].code+" ")''' allmessage=open(decodefile).readlines() encodefile=open(encodefile,"w") for i in range(0,len(allmessage)): message=allmessage[i] '''print(message)''' #进行循环编码 for j in range(0,len(list)): if list[j]==None: return else: key=list[j].key code=list[j].code pattern=re.compile(key) message=re.sub(pattern,code,message) '''print(message)''' encodefile.write(message) #进行解码的方法,循环遍历字符串进行解码 def Decode(encodefile,resultfile,list): encodefile=open(encodefile) allmessage=encodefile.readlines() resultfile=open(resultfile,"w") for i in range(0,len(allmessage)): message=allmessage[i] for j in range(0,len(list)): if list[j]==None: return key=list[j].key code=list[j].code pattern=re.compile(code) message=re.sub(pattern,key,message) resultfile.write(message)
#将哈夫曼树以格式化的形式打印出来,层序遍历 def Printtree(firsttree): list=[] list.append(firsttree) global code position=0 start=1 end=1 string="" while position while position string=string+str(list[position].weight)+" " if list[position].leftchild!=None: list.append(list[position].leftchild) end=end+1 if list[position].rightchild!=None: list.append(list[position].rightchild) end=end+1 position=position+1 print(string) string="" start=end
#以下是测试代码 init(3,"test.txt") buildtree(li,3) pre_order(firsttree,string) Coding("test.txt","test2.txt","test3.txt") Decode("test3.txt","result.txt",list) Printtree(firsttree) 顺便附测试结果图: 原文:aaaabbbbbbbcccccccccc 编码后:10101010000000011111111111111111111 打印的树: F:\python3.5.2\python.exe F:/untitled2/Haffman.py
21
10 11
4 7
Process finished with exit code 0 |