【信息论与编码】【北京航空航天大学】实验二、哈夫曼编码【C语言实现】(下)图像编码压缩

实验2 哈夫曼编码(下)(图像编码)

实验简介: 本次实验为实验1:哈夫曼编码的后续补充,实验1见博客:实验一、哈夫曼编码【C语言实现】(上)

说明: 实验代码仅作为学习使用,欢迎转载、改进,禁止用于任何商业用途。

一、实验内容

“不同格式的压缩”:

1、问题: 使用画图软件或者其他工具进行一些简单的艺术创作(推荐使用三四种颜色,不要太多,尽量使用较大的分辨率(例如3840 x 2160)),分别将图片保存为 bmpjpeg 格式,尝试使用你在必选模组实现的程序编码图片,对比地解释为什么文件体积会发生这种变化。类似地,尝试编码一个可执行文件(.exe格式),并尝试解释文件体积的变化

2、C语言代码实现

整个项目分为以下5个源代码文件:

(1)pq.h
文件说明:优先队列头文件。
文件内容:

//pq.h:优先队列
#ifndef _PRIORITY_QUEUE_H
#define _PRIORITY_QUEUE_H

// =============KeyValue Struct 结构体:键值对=============
typedef struct key_value_struct KeyValue;
struct key_value_struct
{
      int _key;
      void *_value;
};
KeyValue *key_value_new(int key, void *value);
void key_value_free(KeyValue *kv, void (*freevalue)(void *));

// =============PriorityQueue Struct 结构体:优先队列=============
#define PRIORITY_MAX 1
#define PRIORITY_MIN 2
typedef struct priority_queue_struct PriorityQueue;
struct priority_queue_struct
{
      KeyValue **_nodes;
      int _size;
      int _capacity;
      
      int _priority;
};


// Some Declarations 相关声明 
PriorityQueue *priority_queue_new(int priority);
void priority_queue_free(PriorityQueue *pq, void (*freevalue)(void *));
const KeyValue *priority_queue_top(PriorityQueue *pq);
KeyValue *priority_queue_dequeue(PriorityQueue *pq);
void priority_queue_enqueue(PriorityQueue *pq, KeyValue *kv);
int priority_queue_size(PriorityQueue *pq);
int priority_queue_empty(PriorityQueue *pq);
void priority_queue_print(PriorityQueue *pq);
#endif

(2)pq.c

文件说明:优先队列函数实现。
文件内容:

//pq.c
#include 
#include 
#include 
#include "pq.h"


//Private Functions Declarations
static void priority_queue_realloc(PriorityQueue *pq);
static void priority_queue_adjust_head(PriorityQueue *pq);
static void priority_queue_adjust_tail(PriorityQueue *pq);
static int priority_queue_compare(PriorityQueue *pq, int pos1, int pos2);
static void priority_queue_swap(KeyValue **nodes, int pos1, int pos2);


//Functions of KeyValue Struct
// 键、值对结构之构造函数
KeyValue *key_value_new(int key, void *value)
{
      KeyValue *pkv = (KeyValue *)malloc(sizeof(KeyValue));
      pkv->_key = key;
      pkv->_value = value;
      return pkv;
}


// 释放键、值对结构之内存
void key_value_free(KeyValue *kv, void (*freevalue)(void *))
{
      if(kv)
      {
            if(freevalue)
            {
                  freevalue(kv->_value);
            }
            free(kv);
      }
}


//Functions of PriorityQueue Struct
// 优先队列之构造函数
PriorityQueue *priority_queue_new(int priority)
{
      PriorityQueue *pq = (PriorityQueue *)malloc(sizeof(PriorityQueue));
      pq->_capacity = 11; //default initial value
      pq->_size = 0;
      pq->_priority = priority;
      
      pq->_nodes = (KeyValue **)malloc(sizeof(KeyValue *) * pq->_capacity);
      return pq;
}


// 释放优先队列之内存
void priority_queue_free(PriorityQueue *pq, void (*freevalue)(void *))
{
      int i;
      if(pq)
      {
            for(i = 0; i < pq->_size; ++i)
                  key_value_free(pq->_nodes[i], freevalue);
            free(pq->_nodes);
            free(pq);
      }
}


// 取优先队列的队顶元素
const KeyValue *priority_queue_top(PriorityQueue *pq)
{
      if(pq->_size > 0)
            return pq->_nodes[0];
      return NULL;
}


// 出个队
KeyValue *priority_queue_dequeue(PriorityQueue *pq)
{
      KeyValue *pkv = NULL;
      if(pq->_size > 0)
      {
            pkv = pq->_nodes[0];
            priority_queue_adjust_head(pq);
      }
      return pkv;
}


// 入个队
void priority_queue_enqueue(PriorityQueue *pq, KeyValue *kv)
{
      printf("add key:%d\n", kv->_key);
      pq->_nodes[pq->_size] = kv;
      priority_queue_adjust_tail(pq);
      if(pq->_size >= pq->_capacity)
            priority_queue_realloc(pq);
}


// 多大的队?
int priority_queue_size(PriorityQueue *pq)
{
      return pq->_size;
}


// 空不空?
int priority_queue_empty(PriorityQueue *pq)
{
      return pq->_size <= 0;
}


// 输出一下
void priority_queue_print(PriorityQueue *pq)
{
      int i;
      KeyValue *kv;
      printf("data in the pq->_nodes\n");
      for(i = 0; i < pq->_size; ++i)
            printf("%d ", pq->_nodes[i]->_key);
      printf("\n");
      
      printf("dequeue all data\n");
      while(!priority_queue_empty(pq))
      {
            kv = priority_queue_dequeue(pq);
            printf("%d ", kv->_key);
      }
      printf("\n");
}


// 重新分配内存
static void priority_queue_realloc(PriorityQueue *pq)
{
      pq->_capacity = pq->_capacity * 2;
      pq->_nodes = realloc(pq->_nodes, sizeof(KeyValue *) * pq->_capacity);
}


// 头部略作调整
static void priority_queue_adjust_head(PriorityQueue *pq)
{
      int i, j, parent, left, right;
      
      i = 0, j = 0;
      parent = left = right = 0;
      priority_queue_swap(pq->_nodes, 0, pq->_size - 1);
      pq->_size--;
      while(i < (pq->_size - 1) / 2)
      {
            parent = i;
            
            left = i * 2 + 1;
            right = left + 1;
            j = left;
            if(priority_queue_compare(pq, left, right) > 0)
                  j++;
            if(priority_queue_compare(pq, parent, j) > 0)
            {
                  priority_queue_swap(pq->_nodes, i, j);
                  i = j;
            }
            else
                  break;
            
      }
      
}


// 尾部略作调整
static void priority_queue_adjust_tail(PriorityQueue *pq)
{
      int i, parent, child;
      
      i = pq->_size - 1;
      pq->_size++;
      while(i > 0)
      {
            child = i;
            parent = (child - 1) / 2;
            
            if(priority_queue_compare(pq, parent, child) > 0)
            {
                  priority_queue_swap(pq->_nodes, child, parent);
                  i = parent;
            }
            else
                  break;
            
      }
}


// 比个大小
static int priority_queue_compare(PriorityQueue *pq, int pos1, int pos2)
{
      int adjust = -1;
      int r = pq->_nodes[pos1]->_key - pq->_nodes[pos2]->_key;
      if(pq->_priority == PRIORITY_MAX)
            r *= adjust;
      return r;
}


// 交换2个优先队列
static void priority_queue_swap(KeyValue **nodes, int pos1, int pos2)
{
      KeyValue *temp = nodes[pos1];
      nodes[pos1] = nodes[pos2];
      nodes[pos2] = temp;
}

(3)compress.h

文件说明:压缩函数相关头文件。
文件内容:

//compress.h
#ifndef _FILE_COMPRESSION_H
#define _FILE_COMPRESSION_H


//Huffman Tree Node 哈夫曼树结点结构
typedef struct HaffumanTreeNode HTN;
struct HaffumanTreeNode
{
      char _ch;   //character
      int _count; //frequency
      struct HaffumanTreeNode *_left; //left child
      struct HaffumanTreeNode *_right;//right child
};


//FileCompress Struct 文件压缩结构
#define BITS_PER_CHAR 8     //the number of bits in a char
#define MAX_CHARS 256            //the max number of chars
#define FILE_BUF_SIZE 8192  //the size of Buffer for FILE I/O

typedef struct FileCompressStruct FCS;

struct FileCompressStruct
{
      HTN *_haffuman;        //A pointer to the root of hafumman tree
      unsigned int _charsCount; //To store the number of chars
      unsigned int _total; //Total bytes in a file.
      char *_dictionary[MAX_CHARS]; //to store the encoding of each character
      int _statistic[MAX_CHARS]; //To store the number of each character
};

// Function Prototypes 函数原型
FCS *fcs_new();
void fcs_compress(FCS *fcs, const char *inFileName, const char *outFileName);
void fcs_decompress(FCS *fcs, const char *inFileName, const char *outFileName);
void fcs_free(FCS *fcs);

#endif

(4)compress.c

文件说明:压缩相关的函数实现。
文件内容:

//compress.c
#include 
#include 
#include 
#include "compress.h"
#include "pq.h"


// 掩码
static const unsigned char mask[8] = 
{ 
      0x80, /* 10000000 */
      0x40, /* 01000000 */
      0x20, /* 00100000 */
      0x10, /* 00010000 */
      0x08, /* 00001000 */
      0x04, /* 00000100 */
      0x02, /* 00000010 */
      0x01  /* 00000001 */                        
};


//static functions of HTN 哈夫曼树结点相关函数
// 构造函数
static HTN *htn_new(char ch, int count)
{
      HTN *htn = (HTN *)malloc(sizeof(HTN));
      htn->_left = NULL;
      htn->_right = NULL;
      htn->_ch = ch;
      htn->_count = count;
      return htn;
}


// 递归打印函数
static void htn_print_recursive(HTN *htn, int depth)
{
      int i;
      if(htn)
      {
            for(i = 0; i < depth; ++i)
                  printf("  ");
            printf("%d:%d\n", htn->_ch, htn->_count);
            htn_print_recursive(htn->_left, depth + 1);
            htn_print_recursive(htn->_right, depth + 1);
      }
}


// 遍历整棵树
static void htn_print(HTN *htn)
{ 
      htn_print_recursive(htn, 0);
}


// 释放树的内存
static void htn_free(HTN *htn)
{
      if(htn)
      {
            htn_free(htn->_left);
            htn_free(htn->_right);
            free(htn);
      }
}
 

//static functions of FCS 文件压缩相关函数
static void fcs_generate_statistic(FCS *fcs, const char *inFileName)
{
      int ret, i;
      unsigned char buf[FILE_BUF_SIZE];
      FILE *pf = fopen(inFileName, "rb");
      if(!pf)
      {
            fprintf(stderr, "can't open file:%s\n", inFileName);
            return;
      }
      while((ret = fread(buf, 1, FILE_BUF_SIZE, pf)) > 0)
      {
            fcs->_total += ret;
            for(i = 0; i < ret; ++i)
            {
                  if(fcs->_statistic[buf[i]] == 0)
                        fcs->_charsCount++;
                  fcs->_statistic[buf[i]]++;
            }
      }
      fclose(pf);
}



static void fcs_create_haffuman_tree(FCS *fcs)
{
      int i, count;
      HTN *htn, *parent, *left, *right;
      KeyValue *kv, *kv1, *kv2;
      PriorityQueue *pq;
      pq = priority_queue_new(PRIORITY_MIN);
      for(i = 0; i < MAX_CHARS; ++i)
      {
            if(fcs->_statistic[i])
            {
                  htn = htn_new((char)i, fcs->_statistic[i]);
                  kv = key_value_new(fcs->_statistic[i], htn);
                  priority_queue_enqueue(pq, kv);
            }
      }
      //fprintf(stdout, "the number of haffuman leaf is %d\n", priority_queue_size(pq));
      
      while(!priority_queue_empty(pq))
      {
            //fprintf(stdout, "priority queue size:%d\n", priority_queue_size(pq));
            kv1 = priority_queue_dequeue(pq);
            kv2 = priority_queue_dequeue(pq);
            if(kv2 == NULL)
            {
                  fcs->_haffuman = kv1->_value;
                  key_value_free(kv1, NULL);
            }
            else
            {
                  left = (HTN *)kv1->_value;
                  right = (HTN *)kv2->_value;
                  count = left->_count + right->_count;
                  key_value_free(kv1, NULL);
                  key_value_free(kv2, NULL);
                  parent = htn_new(0, count);
                  parent->_left = left;
                  parent->_right = right;
                  kv = key_value_new(count, parent);
                  priority_queue_enqueue(pq, kv);
            }
      }
      priority_queue_free(pq, NULL);
      //htn_print(fcs->_haffuman);
}



static void fcs_generate_dictionary_recursively(HTN *htn, char *dictionary[], char path[], int depth)
{
      char *code = NULL;
      if(htn)
      {
            if(htn->_left == NULL && htn->_right == NULL)
            {
                  code = (char *)malloc(sizeof(char) * (depth + 1));
                  memset(code, 0, sizeof(char) * (depth + 1));
                  memcpy(code, path, depth);
                  dictionary[(unsigned char)htn->_ch] = code;
            }
            if(htn->_left)
            {
                  path[depth] = '0';
                  fcs_generate_dictionary_recursively(htn->_left, dictionary, path, depth + 1);
            }
            if(htn->_right)
            {
                  path[depth] = '1';
                  fcs_generate_dictionary_recursively(htn->_right, dictionary, path, depth + 1);
            }
      }
}



static void fcs_generate_dictionary(FCS *fcs)
{
      char path[32];
      fcs_generate_dictionary_recursively(fcs->_haffuman, fcs->_dictionary, path, 0);
      //fcs_print_dictionary(fcs);
}



static void fcs_print_dictionary(FCS *fcs)
{
      int i;
      for(i = 0; i < MAX_CHARS; ++i)
            if(fcs->_dictionary[i] != NULL)
                  fprintf(stdout, "%d:%s\n", i, fcs->_dictionary[i]);
}



static void fcs_write_statistic(FCS *fcs, FILE *pf)
{
      int i;
      fprintf(pf, "%d\n", fcs->_charsCount);
      for(i = 0; i < MAX_CHARS; ++i)
            if(fcs->_statistic[i] != 0)
                  fprintf(pf, "%d %d\n", i, fcs->_statistic[i]); 
}


// 文件压缩
static void fcs_do_compress(FCS *fcs, const char *inFileName, const char* outFileName)
{
      int i, j, ret;
      
      char *dictEntry, len;
      unsigned int bytes;
      char bitBuf;
      int bitPos;
      
      unsigned char inBuf[FILE_BUF_SIZE];
      FILE *pfIn, *pfOut;
      pfIn = fopen(inFileName, "rb");
      if(!pfIn)
      {
            fprintf(stderr, "can't open file:%s\n", inFileName);
            return;
      }
      pfOut = fopen(outFileName, "wb");
      if(!pfOut)
      {
            fclose(pfIn);
            fprintf(stderr, "can't open file:%s\n", outFileName);
            return;
      }
      fcs_write_statistic(fcs, pfOut);
      bitBuf = 0x00;
      bitPos = 0;
      bytes = 0;
      while((ret = fread(inBuf, 1, FILE_BUF_SIZE, pfIn)) > 0)
      {
            for(i = 0; i < ret; ++i)
            {
                  len = strlen(fcs->_dictionary[inBuf[i]]);
                  dictEntry = fcs->_dictionary[inBuf[i]];
                  //printf("%s\n", dictEntry);
                  for(j = 0; j < len; ++j)
                  {
                        if(dictEntry[j] == '1')
                        {
                              bitBuf |= mask[bitPos++];
                        }
                        else
                        {
                              bitPos++;
                        }
                        
                        if(bitPos == BITS_PER_CHAR)
                        {
                              fwrite(&bitBuf, 1, sizeof(bitBuf), pfOut);
                              bitBuf = 0x00;
                              bitPos = 0;
                              bytes++;
                        }
                  }
            }
      }
      if(bitPos != 0)
      {
            fwrite(&bitBuf, 1, sizeof(bitBuf), pfOut);
            bytes++;
      }
      fclose(pfIn);
      fclose(pfOut);
      printf("The compression ratio is:%f%%\n",
            (fcs->_total - bytes) * 100.0 / fcs->_total);
}



static void fcs_read_statistic(FCS *fcs, FILE *pf)
{
      int i, charsCount = 0;
      int ch;
      int num;
      fscanf(pf, "%d\n", &charsCount);
      fcs->_charsCount = charsCount;
      for(i = 0; i < charsCount; ++i)
      {
            fscanf(pf, "%d %d\n", &ch, &num);
            fcs->_statistic[(unsigned int)ch] = num;
            fcs->_total += num;
      }
}


// 文件解压缩
static void fcs_do_decompress(FCS *fcs, FILE *pfIn, const char *outFileName)
{
      int i, j, ret;
      unsigned char ch;
      HTN *htn;
      unsigned char buf[FILE_BUF_SIZE];
      unsigned char bitCode;
      int bitPos;
      FILE *pfOut;
      pfOut = fopen(outFileName, "wb");
      if(!pfOut)
      {
            fprintf(stderr, "can't open file:%s\n", outFileName);
            return;
      }
      htn = fcs->_haffuman;
      bitCode = 0x00;
      bitPos = 0;
      while((ret = fread(buf, 1, FILE_BUF_SIZE, pfIn)) > 0)
      {
            for(i = 0; i < ret; ++i)
            {
                  ch = buf[i];
                  for(j = 0; j < BITS_PER_CHAR; ++j)
                  {
                        if(ch & mask[j])
                        {
                              htn = htn->_right;      
                        }
                        else
                        {
                              htn = htn->_left;
                        }
                        if(htn->_left == NULL && htn->_right == NULL) //leaf
                        {
                              if(fcs->_total > 0)
                              {
                                    fwrite(&htn->_ch, 1, sizeof(char), pfOut);
                                    fcs->_total--;
                              }
                              htn = fcs->_haffuman;
                        }
                  }
            }
      }
      fclose(pfOut);
}


//FCS functions
FCS *fcs_new()
{
      FCS *fcs = (FCS *)malloc(sizeof(FCS));
      fcs->_charsCount = 0;
      fcs->_total = 0;
      memset(fcs->_statistic, 0, sizeof(fcs->_statistic));
      memset(fcs->_dictionary, 0, sizeof(fcs->_dictionary));
      fcs->_haffuman = NULL;
      return fcs;
}



void fcs_free(FCS *fcs)
{
      int i;
      if(fcs)
      {
            if(fcs->_haffuman)
                  htn_free(fcs->_haffuman);
            for(i = 0; i < MAX_CHARS; ++i)
                  free(fcs->_dictionary[i]);
            free(fcs);
      }
}



void fcs_compress(FCS *fcs, const char *inFileName, const char *outFileName)
{
      fprintf(stdout, "To compress file: %s ...\n", inFileName); 
      fcs_generate_statistic(fcs, inFileName);
      fcs_create_haffuman_tree(fcs);
      fcs_generate_dictionary(fcs);
      fcs_do_compress(fcs, inFileName, outFileName);
      fprintf(stdout, "The compressed data of file: %s stored at %s!\n",
            inFileName, outFileName);
}



void fcs_decompress(FCS *fcs, const char *inFileName, const char *outFileName)
{
      FILE *pfIn;
      fprintf(stdout, "To decompress file: %s ...\n", inFileName);
      pfIn= fopen(inFileName, "rb");
      if(!pfIn)
      {
            fprintf(stderr, "can't open file: %s\n", inFileName);
            return ;
      }
      fcs_read_statistic(fcs, pfIn);
      fcs_create_haffuman_tree(fcs);
      fcs_generate_dictionary(fcs);
      fcs_do_decompress(fcs, pfIn, outFileName);
      fclose(pfIn);
      fprintf(stdout, "The decompressed data of file: %s stored at %s\n",
            inFileName, outFileName);
}

(5)main.c

文件说明: 自定义了压缩后的文件后缀名:.hfm(哈夫曼Huffman的简写)
文件内容:

//main.c
#include 
#include "compress.h"

// 常量定义
const int DO_COMPRESS = 1;
const int DO_DECOMPRESS = 1;
const char *InFile = "tree.jpg"; //The file to compress 要压缩的文件
const char *CompressedFile = "tree.hfm"; //Compressed data of the file 压缩后的文件
const char *OutFile = "tree_decompressed.jpg"; //The decompressed file of the data 解压缩后的文件

// 主函数
int main(int argc, char **argv)
{
      //1. compress file 对文件进行压缩
      if(DO_COMPRESS)
      {
            FCS *fcs1;
            fcs1 = fcs_new();
            fcs_compress(fcs1, InFile, CompressedFile);
            fcs_free(fcs1);
      }
	  
      //2. decompress file 对压缩后的文件进行解压缩
      if(DO_DECOMPRESS)
      {
            FCS *fcs2;
            fcs2 = fcs_new();
            fcs_decompress(fcs2, CompressedFile, OutFile);
            fcs_free(fcs2);
      }
	  
      system("pause");
  
	  return 0;
}

3、图片压缩测试

(1)、测试文件1:tree.jpg
文件说明:手绘的简笔画,内容为一棵树,格式为 .jpg

main.c中输入的相关字段:

const char *InFile = "tree.jpg"; //The file to compress 要压缩的文件
const char *CompressedFile = "tree.hfm"; //Compressed data of the file 压缩后的文件
const char *OutFile = "tree_decompressed.jpg"; //The decompressed file of the data 解压缩后的文件

运行时截图:

【信息论与编码】【北京航空航天大学】实验二、哈夫曼编码【C语言实现】(下)图像编码压缩_第1张图片

压缩的文件大小:276,764字节
压缩的文件大小:242,500字节
压缩导致改变的百分比:约12.4%

(2)、测试文件2:plane.bmp

文件说明:手绘的简笔画,内容为一架飞机,格式为 .bmp

main.c中输入的相关字段:

const char *InFile = "plane.bmp"; //The file to compress 要压缩的文件
const char *CompressedFile = "plane.hfm"; //Compressed data of the file 压缩后的文件
const char *OutFile = "plane_decompressed.bmp"; //The decompressed file of the data 解压缩后的文件

运行时截图:

【信息论与编码】【北京航空航天大学】实验二、哈夫曼编码【C语言实现】(下)图像编码压缩_第2张图片

压缩的文件大小:24,883,254字节
压缩的文件大小:3,775,181字节
压缩导致改变的百分比:约84.8%

4、可执行文件压缩测试

(1)、测试文件1:test.exe
文件说明:某一C语言源程序编译生成的可执行文件,格式为 .exe

main.c中输入的相关字段:

const char *InFile = "test.exe"; //The file to compress 要压缩的文件
const char *CompressedFile = "test.hfm"; //Compressed data of the file 压缩后的文件
const char *OutFile = "test_decompressed.exe"; //The decompressed file of the data 解压缩后的文件

运行时截图:

【信息论与编码】【北京航空航天大学】实验二、哈夫曼编码【C语言实现】(下)图像编码压缩_第3张图片

压缩的文件大小:72,383字节
压缩的文件大小:49,895字节
压缩导致改变的百分比:约31.1%

(2)、测试文件2:bfv.exe
文件说明:主机游戏《战地风云5》的可执行文件,格式为 .exe

main.c中输入的相关字段:

const char *InFile = "bfv.exe"; //The file to compress 要压缩的文件
const char *CompressedFile = "bfv.hfm"; //Compressed data of the file 压缩后的文件
const char *OutFile = "bfv_decompressed.exe"; //The decompressed file of the data 解压缩后的文件

运行时截图:

【信息论与编码】【北京航空航天大学】实验二、哈夫曼编码【C语言实现】(下)图像编码压缩_第4张图片

压缩的文件大小:238,028,800字节
压缩的文件大小:45,326,336字节
压缩导致改变的百分比:约81.0%

问题: 在待压缩文件体积过大时,解压缩时可能会出现问题。具体原因仍在研究当中。
至此,本次实验结束。

你可能感兴趣的:(C语言,信息论与编码,c语言,开发语言)