一、实验目的
对一副BMP 格式的灰度图像进行霍夫曼编码和译码
二、函数代码实现
新建Huffman.py文件,编写调用的功能函数
#coding: utf-8
from PIL import Image
#利用python的PIL模块处理图像
class node: #节点的类
#定义节点构造方法
def __init__(self,right=None,left=None, parent=None, weight=0, code=None):
self.left = left
self.right = right
self.parent = parent
self.weight = weight #权重
self.code = code #节点值
#定义函数,将彩色图转为灰色图,此时图像的每个像素点可以用单独的像素值表示
def picture_convert(filename,newfilename):
picture = Image.open(filename)
picture = picture.convert('L')#将bmp 图片转换为灰值图
picture. save(newfilename)#保存灰度图像
return picture
#定义函数,统计每个像素出现的次数
def pixel_number_caculate(list):
pixel_number={}
for i in list:
if i not in pixel_number. keys():
pixel_number[i]=1 #若此像素点不在字符频率字典里则直接添加
else:
pixel_number[i] += 1 #若存在在字符频串字典里则对应值加一
return pixel_number
#构造节点,分別陚予其值和对应的权值
def node_construct(pixel_number):
node_list =[]
for i in range(len(pixel_number)):
node_list.append(node(weight=pixel_number[i][1],code=str(pixel_number[i][0])))
return node_list
#构造节点,分別陚予其值和对应的权值
def node_construct(pixel_number):
node_list =[]
for i in range(len(pixel_number)):
node_list.append(node(weight=pixel_number[i][1],code=str(pixel_number[i][0])))
return node_list
#根据叶子结点列表,生成对应的霍夫曼编码树
def tree_construct(listnode):
listnode = sorted(listnode,key=lambda node:node.weight)
while len(listnode) != 1:
# 每 次 取 权 值 的 两 个 像 素 点 进 行 合 并
low_node0,low_node1 = listnode[0], listnode[1]
new_change_node = node()
new_change_node.weight = low_node0.weight + low_node1.weight
new_change_node.left = low_node0
new_change_node.right = low_node1
low_node0.parent = new_change_node
low_node1.parent = new_change_node
listnode.remove(low_node0)
listnode.remove(low_node1)
listnode.append(new_change_node)
listnode = sorted(listnode, key=lambda node:node.weight)
return listnode
#霍夫曼编码的主函数,通过对其他函数的调用完成对像素点的编码
def Huffman_Coding(picture):
#得到图片的宽度和高度
width = picture.size[0]
height = picture.size[1]
im = picture.load()
print ("灰度图宽为"+str(width)+"像素")
print ("灰度图高为"+str(height)+"像素")
#将像素点保存在Jist中,原来的二维矩阵变为一维数组
list =[]
for i in range(width):
for j in range(height):
list.append(im[i,j])
#统计每个像素点的次数,并根据出现的次数由小到大排序
pixel_number = pixel_number_caculate(list)
pixel_number = sorted(pixel_number.items(),key=lambda item:item[1])
#根据像素点的值和其出现次数构造节点list
node_list = node_construct(pixel_number)
# 构 造 哈 夫 曼 树 ,保 存 头 结 点
head = tree_construct(node_list)[0]
#构造编码表
coding_table = {}
for e in node_list:
new_change_node = e
coding_table.setdefault(e.code,"")
while new_change_node !=head:
if new_change_node.parent.left == new_change_node:
coding_table[e.code] = "1" + coding_table[e.code]
else:
coding_table[e.code] = "0" + coding_table[e.code]
new_change_node = new_change_node. parent
#输出每个像累点灰度值和编码
for key in coding_table.keys():
print ("信源像素点"+ key+"编码后的码字力:" + coding_table[key])
#输出编码表
print ("编码表为:",coding_table)
#将图像的编码结果转换成字符串井保存到txt里
coding_result = ''
for i in range(width):
for j in range(height):
for key,values in coding_table.items():
if str(im[i,j]) == key:
coding_result = coding_result+values
file = open('coding_result.txt','w')
file.write(coding_result)
#还原原始的bmp图像,遍历霍夫曼编码的结果,对于每一个被遍历到的字符均在码字列表
#中进行查找,若未找到则加上后续一个字符,继续查找,重复此步骤,直到在码字列表中找
#到该码字对应的像素点,将其码字对应的像素值放入到像素点列表中。
def Decoding(width,height,coding_table,coding_result):
code_read_now=''#当前读到的编码
new_pixel =[]
i = 0
while (i != coding_result.__len__()):
#每次往后读一位
code_read_now = code_read_now + coding_result[i]
for key in coding_table.keys():
#如果当前读到的编码在编码表里存在
if code_read_now == coding_table[key]:
new_pixel. append(key)
code_read_now = ''
break
i +=1
#构造新图像
decode_image = Image.new( 'L' ,(width,height))
k = 0
#篇予像聚值
for i in range(width):
for j in range(height):
decode_image.putpixel((i,j),(int(new_pixel[k])))
k+=1
decode_image.save('decode.bmp')
print("译码已经完成:图片存储为decode.bmp")
三、霍夫曼编码
编写code.py 文件,调用Huffman.py文件中的函数
#coding: utf-8
from PIL import Image
from Huffman import *
picture = picture_convert('test.bmp','new.bmp')
Huffman_Coding(picture)
test.bmp
运行结果:
四、霍夫曼解码
编写decode.py文件,将编码表粘贴到coding_table全局变量
#coding: utf-8
from Huffman import *
coding_table = {'11': '001101000011111', '12': '001101000011110', '235': '00110100001110', '234': '00000010110111', '13': '00000010110110', '238': '0110100001101', '237': '0110100001100', '240': '0100100001101', '233': '0100100001100', '244': '0011101001111', '232': '0011101001110', '236': '0011101001011', '243': '0011101001010', '242': '0011010000110', '239': '0011010000101', '241': '0011010000100', '231': '0001001100111', '245': '0001001100110', '246': '0000001011010', '191': '011010000111', '190': '010110000101', '14': '010110000100', '230': '010010000111', '247': '010010000101', '193': '010010000100', '195': '001110100110', '248': '001110100100', '192': '001101000111', '189': '001101000110', '194': '001000110011', '198': '001000110010', '196': '000100110010', '197': '000001101111', '187': '000001101110', '200': '000000101111', '188': '000000101110', '15': '000000101100', '199': '01110010111', '249': '01110010110', '201': '01101000010', '202': '01011000011', '186': '00110100010', '229': '00110100000', '203': '00101000011', '250': '00101000010', '16': '00100011000', '205': '00010011000', '204': '00000110110', '67': '00000000101', '206': '00000000100', '44': '0111011011', '66': '0111011010', '185': '0111001010', '65': '0111000011', '46': '0111000010', '78': '0111000001', '50': '0111000000', '69': '0110110111', '73': '0110110110', '68': '0110110101', '57': '0110110100', '53': '0110101111', '72': '0110101110', '52': '0110101101', '74': '0110101100', '64': '0110101011', '45': '0110101010', '17': '0110100011', '80': '0110100010', '79': '0110100000', '54': '0110011111', '77': '0110011110', '60': '0110011001', '55': '0110011000', '48': '0110010111', '75': '0110010110', '71': '0110010001', '63': '0110010000', '76': '0110001011', '70': '0110001010', '61': '0101111101', '47': '0101111100', '43': '0101100111', '82': '0101100110', '49': '0101100011', '42': '0101100010', '81': '0101100000', '56': '0101000111', '51': '0101000110', '40': '0100110111', '39': '0100110110', '62': '0100110011', '83': '0100110010', '59': '0100100011', '184': '0100100010', '85': '0100100000', '207': '0100001111', '58': '0100001110', '251': '0011111101', '38': '0011111100', '41': '0011101111', '183': '0011101110', '84': '0011101000', '87': '0010101011', '86': '0010101010', '18': '0010100000', '37': '0010001101', '35': '0001110101', '36': '0001110100', '209': '0001100111', '208': '0001100110', '182': '0001001101', '88': '0000011111', '228': '0000011110', '211': '0000011010', '34': '0000011001', '19': '0000011000', '180': '0000001111', '210': '0000001110', '252': '0000001010', '89': '0000000011', '33': '011111011', '212': '011111010', '178': '011101100', '179': '011100100', '20': '011010100', '32': '011001110', '181': '011001101', '176': '011001010', '90': '011001001', '21': '011000100', '175': '010111111', '177': '010110010', '173': '010100111', '174': '010100110', '31': '010100010', '22': '010011010', '215': '010011000', '23': '010000110', '25': '001111111', '213': '001111011', '24': '001111010', '172': '001110110', '27': '001110101', '214': '001110011', '30': '001110010', '26': '001101001', '91': '001100001', '217': '001100000', '28': '001011001', '216': '001011000', '253': '001010100', '29': '001010001', '227': '001001101', '127': '001001100', '126': '001000111', '218': '000111011', '171': '000110010', '170': '000101101', '125': '000101100', '122': '000100111', '124': '000011111', '128': '000011110', '169': '000010111', '123': '000010110', '226': '000001110', '92': '000001011', '225': '000001010', '121': '000001001', '168': '000001000', '129': '000000110', '165': '000000100', '130': '000000011', '162': '000000010', '166': '000000000', '167': '01111111', '220': '01111110', '163': '01111100', '120': '01110111', '164': '01110101', '131': '01110100', '132': '01110011', '135': '01110001', '133': '01101111', '219': '01101110', '134': '01101100', '156': '01101001', '136': '01100011', '119': '01100001', '158': '01100000', '137': '01011110', '160': '01011101', '93': '01011100', '159': '01010101', '161': '01010100', '155': '01010010', '157': '01010000', '138': '01001111', '139': '01001110', '224': '01001001', '221': '01000111', '118': '01000110', '140': '01000101', '110': '01000100', '113': '01000010', '111': '01000001', '141': '01000000', '142': '00111110', '112': '00111100', '154': '00111000', '222': '00110111', '254': '00110110', '108': '00110101', '115': '00110011', '94': '00110010', '114': '00110001', '109': '00101111', '117': '00101110', '150': '00101101', '106': '00101011', '143': '00101001', '116': '00100111', '152': '00100010', '223': '00100001', '149': '00100000', '145': '00011100', '153': '00011000', '107': '00010111', '95': '00010010', '146': '00001110', '147': '00001101', '144': '00001100', '148': '00001010', '105': '00001001', '151': '00001000', '104': '0101101', '96': '0101011', '103': '0100101', '97': '0010010', '102': '0001111', '100': '0001101', '101': '0001010', '98': '0001000', '99': '011110', '255': '1'}
file=input("请箱入逬行霍夫曼译码的文件名:")
f = open(file,'r', encoding='ISO-8859-1')#, encoding='UTF-8'///'rb'
coding_result = f.readlines()[0].strip('\n')
width=int (input( "请输入需要还原的图片的宽:"))
height = int(input("请输入需要还原的图片的高:"))
Decoding(width,height,coding_table,coding_result)
运行结果:
(完)
之前的报错,根据评论区的修改已解决:
此外,还有另一套可用代码,在github的这个目录下
https://github.com/Wfzsec/Huffman-Coding
NameError: name 'raw_input' is not defined
未定义名称“raw_input”
原因是:rwa_input是2.x版本的输入函数,在新版本环境下会报错,该函数未定义。在3.x版本中应该用input()代替raw_input()。
那么将“raw_input”改成“input”就可已解决了。
Python中的花括号{}:
代表dict字典数据类型,字典是Python中唯一内建的映射类型。字典中的值没有特殊的顺序,但都是存储在一个特定的键(key)下。键可以是数字、字符串甚至是元祖。
1 >>> dic = {'jon':'boy','lili"':'girl'} 2 >>> dic 3 {'jon': 'boy', 'lili"': 'girl'}
python2和python3中运算符的区别
查看代码中是否含有/,
python3的/结果含有浮点数!
python2中的/等价于python3的//
在python3中,//表示取整除 - 返回商的整数部分(向下取整)
TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'
AttributeError: 'dict' object has no attribute 'iteritems'
Python3中:iteritems变为items
附:
https://www.cnblogs.com/-wenli/p/12434263.html
https://www.cnblogs.com/tr1ple/p/7226150.html
为了偷懒,对老师给出的PDF文档进行了OCR文字识别
标点符号在识别中极其容易出错,需要逐一对比修正
鸣谢:
>>在线文字识别转换
>>免费在线OCR