生命短暂而信息无…缩写是无法避免的灾难,而缩写创造者的任务就是要尽力做好工作。尽管缩写有害,但总比什么也不做要强
分别用区域编码和阈值编码方法实现图像压缩,用8×8DCT变换,保留50%的系数(区域编码保留前50%个系数,阈值编码保留50%的大系数),并对解码图像进行比较
块变换编码的过程如下所示
其中分块部分采用8*8的大小进行分块,前向变换部分采用DCT变换,而在比特分配中,有两种系数截断的方法进行压缩。分别是
当两种编码方式均保留前50%系数时,根据个人理解,得到的矩阵是这样的
将两个矩阵分别用于DCT后的图像进行压缩变换,并进行解码,同时计算出两个解码后图像和圆图像的RMS值进行客观评价,得到的结果如下所示
从图中可以看见,两种编码结果中,区域编码的结果较好,但是从课本的理论上分析应该是阈值编码的效果较好,这里不知道是自己的代码出了问题还是理解上出了问题,一直都没有找到合适的原因,如果有想法的可以在下方留言。
实现的代码如下所示:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
# 生成DCT矩阵
def generate_dct_matrix(block_size):
dct_mat = np.zeros((block_size, block_size))
for i in range(block_size):
for j in range(block_size):
if i == 0:
a = np.sqrt(1 / block_size)
else:
a = np.sqrt(2 / block_size)
temp = ((2 * j + 1) * i * np.pi) / (2 * block_size)
dct_mat[i, j] = a * np.cos(temp)
return dct_mat
# 按照指定块大小进行DCT变换
def block_process(image, dct_mat):
block_size = dct_mat.shape[0]
height, width = image.shape
dct_img = np.zeros((height, width), dtype=np.float32)
for row in np.arange(0, height, block_size):
for col in np.arange(0, width, block_size):
block = np.array(image[row:(row + block_size), col:(col + block_size)], dtype=np.float32)
dct_img[row:(row + block_size), col:(col + block_size)] = np.matmul(np.matmul(dct_mat, block), dct_mat.T)
return dct_img
# 编码
def encode(image, encode_method):
block_size = encode_method.shape[0]
arr = encode_method
height, width = image.shape
encode_img = np.zeros((height, width), dtype=np.float32)
for row in np.arange(0, height, block_size):
for col in np.arange(0, width, block_size):
block = np.array(image[row:(row + block_size), col:(col + block_size)], dtype=np.float32)
encode_img[row:(row + block_size), col:(col + block_size)] = block * arr
return encode_img
# 逆DCT变换
def inv_block_process(image, dct_mat):
block_size = dct_mat.shape[0]
height, width = image.shape
inv_dct_img = np.zeros((height, width), dtype=np.float32)
for row in np.arange(0, height, block_size):
for col in np.arange(0, width, block_size):
block = np.array(image[row:(row + block_size), col:(col + block_size)], dtype=np.float32)
inv_dct_img[row:(row + block_size), col:(col + block_size)] = np.matmul(np.matmul(dct_mat.T, block),
dct_mat)
return inv_dct_img
# 计算Rms
def rms(predictions, targets):
return np.sqrt(((predictions - targets) ** 2).mean())
# 区域编码
def area_encode():
arr = np.zeros((8, 8))
for i in range(8):
for j in range(8):
if i + j <= 6:
arr[i, j] = 1
else:
arr[i, j] = 0
if i + j == 7 and i <= 3:
arr[i, j] = 1
return arr
# 阈值编码
def threshold_encode(dct_mat):
arr = np.ones((8, 8))
flat_dct = dct_mat.reshape(1, 64)
thres = np.median(np.abs(flat_dct))
for i in range(8):
for j in range(8):
if np.abs(dct_mat[i, j]) < thres:
arr[i, j] = 0
return arr
def main(img_path):
src = np.array(Image.open(img_path).convert("L"))
dct = generate_dct_matrix(block_size=8)
dct_img = block_process(src, dct)
inv_dct_img = inv_block_process(dct_img, dct)
area = area_encode()
dct_img_area = encode(dct_img, area)
inv_dct_img_area = inv_block_process(dct_img_area, dct)
rms_area = np.abs(inv_dct_img_area - src)
rms_area_num = rms(inv_dct_img_area, src)
thre = threshold_encode(dct)
dct_img_thre = encode(dct_img, thre)
inv_dct_img_thre = inv_block_process(dct_img_thre, dct)
rms_thre = np.abs(inv_dct_img_thre - src)
rms_thre_num = rms(inv_dct_img_thre, src)
img_list = [src, dct_img, inv_dct_img, dct_img_area, inv_dct_img_area, rms_area,
dct_img_thre, inv_dct_img_thre, rms_thre]
img_name = ["原图像", "经过DCT变换", "经过逆DCT变换", "区域编码", "区域编码结果", "RMS=" + str(rms_area_num),
"阈值编码", "阈值编码结果", "RMS=" + str(rms_thre_num)]
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
_, axs = plt.subplots(3, 3, figsize=(12, 12))
for i in range(3):
for j in range(3):
axs[i][j].imshow(img_list[i * 3 + j], cmap='gray', vmin=0, vmax=255)
axs[i][j].set_title(img_name[i * 3 + j])
axs[i][j].axes.get_xaxis().set_visible(False)
axs[i][j].axes.get_yaxis().set_visible(False)
plt.tight_layout()
plt.show()
if __name__ == '__main__':
main('man.jpg')