用huffman编码一个字符串

一、霍夫曼编码


实验目的:

(1).利用霍夫曼编码给一个字符串编码


实验原理

如何构造一棵霍夫曼树:

(1).构造一个优先队列,从小到大排列每一个元素。

(2).2个权值最小的队列结点出队,构造一个新的结点满足新结点的权值是原来2个结点权值之和。

(3).把新的结点压入最小优先队列中。

(4).重复2.3知道队列为空,这是就构造了一棵霍夫曼树。


实验步骤:

(1).先任意输入一个字符串,求出每一个字符出现的频数,存在数组freqs里面

(2).把频数数组freqs作为参数传递给create_huffman_codes函数构造霍夫曼树。

(3).打印出每一个字符,及它对应的十进制编码和二进制编码。


具体实现程序:


#include  
#include  
#include  
 
#define BYTES 256 
 
struct huffcode { 
  int nbits; 
  int code; 
}; 
typedef struct huffcode huffcode_t; 
 
struct huffheap { 
  int *h; 
  int n, s, cs; 
  long *f; 
}; 
typedef struct huffheap heap_t; 
 
/* heap handling funcs */ 
static heap_t *_heap_create(int s, long *f) 
{ 
  heap_t *h; 
  h = malloc(sizeof(heap_t)); 
  h->h = malloc(sizeof(int)*s); 
  h->s = h->cs = s; 
  h->n = 0; 
  h->f = f; 
  return h; 
} 
 
static void _heap_destroy(heap_t *heap) 
{ 
  free(heap->h); 
  free(heap); 
} 
 
#define swap_(I,J) do { int t_; t_ = a[(I)];	\ 
    a[(I)] = a[(J)]; a[(J)] = t_; } while(0) 
static void _heap_sort(heap_t *heap) 
{ 
  int i=1, j=2; /* gnome sort */ 
  int *a = heap->h; 
		 
  while(i < heap->n) { /* smaller values are kept at the end */ 
    if ( heap->f[a[i-1]] >= heap->f[a[i]] ) { 
      i = j; j++; 
    } else { 
      swap_(i-1, i); 
      i--; 
      i = (i==0) ? j++ : i; 
    } 
  } 
} 
#undef swap_ 
 
static void _heap_add(heap_t *heap, int c) 
{ 
  if ( (heap->n + 1) > heap->s ) { 
    heap->h = realloc(heap->h, heap->s + heap->cs); 
    heap->s += heap->cs; 
  } 
  heap->h[heap->n] = c; 
  heap->n++; 
  _heap_sort(heap); 
} 
 
static int _heap_remove(heap_t *heap) 
{ 
  if ( heap->n > 0 ) { 
    heap->n--; 
    return heap->h[heap->n]; 
  } 
  return -1; 
} 
 
/* huffmann code generator */ 
huffcode_t **create_huffman_codes(long *freqs) 
{ 
  huffcode_t **codes; 
  heap_t *heap; 
  long efreqs[BYTES*2]; 
  int preds[BYTES*2]; 
  int i, extf=BYTES; 
  int r1, r2; 
				 
  memcpy(efreqs, freqs, sizeof(long)*BYTES); 
  memset(&efreqs[BYTES], 0, sizeof(long)*BYTES); 
					 
  heap = _heap_create(BYTES*2, efreqs); 
  if ( heap == NULL ) return NULL; 
						 
  for(i=0; i < BYTES; i++) if ( efreqs[i] > 0 ) _heap_add(heap, i); 
						   
  while( heap->n > 1 ) 
    { 
      r1 = _heap_remove(heap); 
      r2 = _heap_remove(heap); 
      efreqs[extf] = efreqs[r1] + efreqs[r2]; 
      _heap_add(heap, extf); 
      preds[r1] = extf; 
      preds[r2] = -extf; 
      extf++; 
    } 
  r1 = _heap_remove(heap); 
  preds[r1] = r1; 
  _heap_destroy(heap); 
								   
  codes = malloc(sizeof(huffcode_t *)*BYTES); 
									 
  int bc, bn, ix; 
  for(i=0; i < BYTES; i++) { 
    bc=0; bn=0; 
    if ( efreqs[i] == 0 ) { codes[i] = NULL; continue; } 
    ix = i; 
    while( abs(preds[ix]) != ix ) { 
      bc |= ((preds[ix] >= 0) ? 1 : 0 ) << bn; 
      ix = abs(preds[ix]); 
      bn++; 
    } 
    codes[i] = malloc(sizeof(huffcode_t)); 
    codes[i]->nbits = bn; 
    codes[i]->code = bc; 
  } 
  return codes; 
} 
 
void free_huffman_codes(huffcode_t **c) 
{ 
  int i; 
	   
  for(i=0; i < BYTES; i++) free(c[i]); 
  free(c); 
} 
 
#define MAXBITSPERCODE 100 
 
void inttobits(int c, int n, char *s) 
{ 
  s[n] = 0; 
  while(n > 0) { 
    s[n-1] = (c%2) + '0'; 
    c >>= 1; n--; 
  } 
} 
 
const char *test = "hellohellohello"; 
 
int main() 
{ 
  huffcode_t **r; 
  int i; 
  char strbit[MAXBITSPERCODE]; 
  const char *p; 
  long freqs[BYTES]; 
			   
  memset(freqs, 0, sizeof freqs); 
				 
  p = test; 
  while(*p != '\0') freqs[*p++]++; 
					 
  r = create_huffman_codes(freqs); 
					   
  for(i=0; i < BYTES; i++) { 
    if ( r[i] != NULL ) { 
      inttobits(r[i]->code, r[i]->nbits, strbit); 
      printf("%c (%d) %s\n", i, r[i]->code, strbit); 
    } 
  } 
						 
  free_huffman_codes(r); 
						   
  return 0; 
}

测试字符串是hellohellohello”

运行结果:

e(1) 01

h(0) 000

l(1) 1

o(1) 001


如果要用huffman编码来实现文本压缩的话只要把每一个字符与它对应的码字构造一个字典放在压缩文件中,再把所有字符的码字输入到压缩文件中,这样就能实现压缩,如果要解压的话在字典中查一下码字对应的字符就能解压了,暂时不太熟悉文本操作,什么时候时间多了再去完善一下。

你可能感兴趣的:(用huffman编码一个字符串)