Linux下实现Huffman编码压缩算法

 

//stack.h

/*************************************************************
    FileName : stack.h 
    FileFunc : 定义栈头文件  
    Version  : V0.1  
    Author   : Sunrier  
    Date     : 2012-07-09 09:33:48 
    Descp    : Linux下栈头文件 
*************************************************************/
#ifndef   __STACK_H__
#define   __STACK_H__

#ifdef __cplusplus
extern "C" {
#endif

#include "tree.h"

#define STACK_SIZE 128
#define STACK_INCREMENT_SIZE 128

typedef pTree ElemType;

typedef struct stack
{
	ElemType *bottom;
	int top;
	int size;
}sStack,*pStack;

void init_stack(pStack *p);
int isEmpty(pStack p);
int isFull(pStack p);
int push(pStack,ElemType e);
int pop(pStack p,ElemType *e);
int getTop(pStack p,ElemType *e);

#ifdef __cplusplus
}
#endif

#endif



 

 

//stack.c

/*************************************************************
    FileName : stack.c 
    FileFunc : 定义实现栈函数 
    Version  : V0.1  
    Author   : Sunrier  
    Date     : 2012-07-09 09:33:29 
    Descp    : Linux下实现栈函数
*************************************************************/
#include <stdlib.h>
#include "stack.h"

/*栈先进后出*/
void init_stack(pStack *p)
{
	*p = malloc(sizeof(sStack));
	(*p)->bottom = malloc(sizeof(ElemType)*STACK_SIZE);
	(*p)->top = -1;
	(*p)->size = STACK_SIZE;
}

/*判断栈是否为空*/
int isEmpty(pStack p)
{
	if( -1==p->top )
		return 1;

	return 0;

}

/*判断栈是否已满*/
int isFull(pStack p)
{
	if( (p->size-1)==p->top )
		return 1;
		
	return 0;
}

/*入栈*/
int push(pStack p,ElemType e)
{
	if( isFull(p) )
	{
		p->bottom = realloc(p->bottom,(p->size+STACK_INCREMENT_SIZE)*sizeof(ElemType));
		p->size += STACK_INCREMENT_SIZE;
	}
	
	p->top++;
	p->bottom[p->top] = e;
	
	return 1;
}

/*出栈*/
int pop(pStack p,ElemType *e)
{
	if(isEmpty(p))
	{
		return 0;
	}

	*e = p->bottom[p->top];
	p->top--;
	
	return 1;
}

/*取栈顶数据*/
int getTop(pStack p,ElemType *e)
{
	if( isEmpty(p) )
		return -1;
		
	*e = p->bottom[p->top];

	return 1;
}





 

//queue.h

/*************************************************************
    FileName : queue.h 
    FileFunc : 定义队列头文件  
    Version  : V0.1  
    Author   : Sunrier  
    Date     : 2012-07-09 09:51:11 
    Descp    : Linux下队列头文件 
*************************************************************/
#ifndef   __QUEUE_H__
#define   __QUEUE_H__

#ifdef __cplusplus
extern "C" {
#endif

#include "tree.h"

typedef pTree QueueElem;

typedef struct queue
{
	QueueElem data;
	struct queue *next;
}sQueue, *pQueue;

void init_queue(pQueue *p, QueueElem data);
int push_queue(pQueue pq, QueueElem data);
int pop_queue(pQueue pq, QueueElem *data);


#ifdef __cplusplus
}
#endif

#endif



 

//queue.c

/*************************************************************
    FileName : queue.c 
    FileFunc : 定义实现队列函数  
    Version  : V0.1  
    Author   : Sunrier  
    Date     : 2012-07-09 09:51:14 
    Descp    : Linux下实现队列函数
*************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "queue.h"

/*队列先进先出*/
void init_queue(pQueue *p, QueueElem data)
{
	*p = malloc(sizeof(sQueue));
	(*p)->data = data;
	(*p)->next = NULL;
}

/*进队列,进来的数据放入队列最后*/
int push_queue(pQueue pq, QueueElem ptree)
{
	pQueue ptrav = pq, pnew;
	if ( NULL==pq )
	{
		return 0;
	}
	while ( NULL!=ptrav->next )
	{
		ptrav = ptrav->next;
	}
	
	init_queue(&pnew, ptree);
	ptrav->next = pnew;
	
	return 1;
}

/*出队列,把队列中第一个数据出队列*/
int pop_queue(pQueue pq, QueueElem *data)
{
	pQueue pdel;
	
	if ( pq == NULL )
	{
		return -1;
	}
	
	if ( pq->next == NULL )
	{
		return 0;
	}
	pdel = pq->next;
	*data = pdel->data;
	pq->next = pdel->next;
	free(pdel);
	
	return 1;
}





 

 

//tree.h

/*************************************************************
    FileName : tree.h 
    FileFunc : 定义二叉树头文件  
    Version  : V0.1  
    Author   : Sunrier  
    Date     : 2012-07-09 09:58:03 
    Descp    : Linux下二叉树头文件 
*************************************************************/
#ifndef   __TREE_H__
#define   __TREE_H__

#ifdef __cplusplus
extern "C" {
#endif

#include <stdio.h>

typedef unsigned char etype;
typedef int type;

typedef struct TreeNode
{
	etype data;
	type count;
	struct TreeNode *next;
	struct TreeNode *left;
	struct TreeNode *right;
}Tree,*pTree;

void Init_TreeNode(pTree *p);
void Init_eTreeNode(pTree *p,etype data);
int Read_File(pTree proot,FILE *pr,FILE *pw);
pTree Get_Frequency(pTree proot);
void Huffman(pTree *proot);
void Read_Huffman(pTree proot,int n,FILE *pr,FILE *pw);
void Create_Huffman(pTree proot,int ch,FILE *pr);
void ReHuffman(pTree proot,FILE *pr,FILE *pw,int count,int Num_Byte);

#ifdef __cplusplus
}
#endif

#endif



 

 

//tree.c

/*************************************************************
    FileName : tree.c 
    FileFunc : 定义实现二叉树函数  
    Version  : V0.1  
    Author   : Sunrier  
    Date     : 2012-07-09 09:58:00 
    Descp    : Linux下实现二叉树函数
*************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
#include "stack.h"
#include "queue.h"

/*初始化树的结点*/
void Init_TreeNode(pTree *p)
{
	*p=malloc(sizeof(Tree));
	(*p)->data = 0;
	(*p)->count = 0;
	(*p)->left = NULL;
	(*p)->right = NULL;
	(*p)->next = NULL;
}

/*初始化树的结点数据*/
void Init_eTreeNode(pTree *p,etype data)
{
	*p = malloc(sizeof(Tree));
	(*p)->data = data;
	(*p)->count = 1;;
	(*p)->left = NULL;
	(*p)->right = NULL;
	(*p)->next = NULL;
}

/*取原文件数据构造树型链表*/
int Read_File(pTree proot,FILE *pr,FILE *pw)
{
	pTree p,pnew;
	unsigned char ch;
	
	if( NULL==proot )
		return 0;
		
	/*静态统计模型*/
	/*统计原始数据中各字符出现的频率(即个数)*/
	while( fread(&ch,sizeof(unsigned char),1,pr)>0 )
	{
		printf("%c",ch);
		
		for(p=proot; p->next!=NULL; p=p->next)
		{
			if(p->next->data==ch)
			{
				(p->next->count)++;
				break;
			}
		}	
			
		if( ( NULL==p->next) && (ch!=p->data) )
		{
			Init_eTreeNode(&pnew,ch);
			p->next = pnew;
		}
	}

	printf("\n");
	int total = 0,num = 0;
	
	/*统计原始数据中不同字符出现的个数以及所有字符出现的总的次数*/
	for(p=proot->next; p!=NULL; p=p->next)
	{
		fwrite(&p->data,sizeof(char),1,pw);		
		fwrite(&p->count,sizeof(int),1,pw);		
		printf("%c:%d\n",p->data,p->count);
		total += p->count;
		num++;
	}
	printf("total:%d num:%d\n",total,num);
	return num;
}

/*从树型链表中找出字符频率出现最小的结点*/
pTree Get_Frequency(pTree proot)
{
	if( NULL==proot->next ) 
		return NULL;
		
	pTree p,ps = proot,min = proot->next,prev;
	for(prev=proot; prev->next!=NULL; prev=prev->next)
	{
		p = prev->next;
		if( p->count<min->count )
		{
			min = p;
			ps = prev;
		}
	}

	ps->next = min->next;
	printf("huffman:%d",min->count);
	return min;	
}

/*构造最小二叉树*/
void Huffman(pTree *proot)
{
	pTree min1,min2;
	pTree pnew,p;

	while( ((min1=Get_Frequency(*proot))!=NULL) && ((min2=Get_Frequency(*proot))!=NULL) )
	{	
		Init_TreeNode(&pnew);
		puts("*");
		pnew->left = min1;
		pnew->right = min2;
		pnew->count = min1->count+min2->count;
		min1->next = pnew;
		min2->next = pnew;
		p = (*proot)->next;
		(*proot)->next = pnew;
		pnew->next = p;
	}

	free(*proot);
	*proot = min1;
	puts("----");
}

/*对二叉树进行编码,得到各个字符的编码格式写到压缩后的文件中*/
void Read_Huffman(pTree proot,int n,FILE *pr,FILE *pw)
{
	unsigned char bigcode = 0;
	pTree pnew,pp,pc;
	Init_eTreeNode(&pnew,0);
	
	pStack ps;
	init_stack(&ps);
	
	pQueue p;
	init_queue(&p,NULL);

	int count = 0,Num_Byte = 1;
	unsigned char ch;
	
	while( fread(&ch,sizeof(char),1,pr)>0 )
	{
		push_queue(p,proot);
		
		while( pop_queue(p,&pnew) )
		{
			if( NULL==pnew->left )
			{
				if( ch==pnew->data )/*找到队列中的匹配字符*/
				{
					/*printf("ch = %c \n",ch);*/
					while( NULL!=pnew )/*父结点全部压栈,以便编码*/
					{
						push(ps,pnew);
						pnew=pnew->next;
					}
					
					pop(ps,&pp);
					while( pop(ps,&pc) )
					{
						if( 8==count )
						{	
							fwrite(&bigcode,sizeof(char),1,pw);
							count = 0;
							bigcode = 0;Num_Byte++;		
						}
						if( pp->left==pc )/*判断是左结点还是右结点,左结点上为0,右结点上为1*/
						{
								bigcode = (bigcode<<1)+0;
								count++;
						}
						else
						{
								bigcode = (bigcode<<1)+1;
								count++;			
						}

						pp = pc;
					}
					
					while( pop_queue(p,&pnew) );/*其他字符全部出队列*/

				}
			}
			else 
			{
				push_queue(p,pnew->right);
				push_queue(p,pnew->left);
			}
		}
	}
	
	bigcode = bigcode<<(8-count);
	fwrite(&bigcode,sizeof(char),1,pw);
	printf("bigcode=%d\n",bigcode);
	
	int info0 = n;
	int info1 = count;
	int info2 = Num_Byte;
	fwrite(&info0,sizeof(int),1,pw);
	fwrite(&info1,sizeof(int),1,pw);
	fwrite(&info2,sizeof(int),1,pw);
}

/*取压缩文件的数据构造树型链表*/
void Create_Huffman(pTree proot,int ch,FILE *pr)
{

	if( NULL==proot )
		return ;
		
	etype data;type count;

	printf("ch=%d\n",ch);
	pTree p = proot,pnew;
	
	while( ch )
	{
		fread(&data,sizeof(etype),1,pr);
		fread(&count,sizeof(type),1,pr);
	
		Init_eTreeNode(&pnew,data);
		pnew->data = data;
		pnew->count = count;
		p->next = pnew;
		p = pnew;
		ch--;
	}
	
	int total = 0,num = 0;
	for(p=proot->next; p!=NULL; p=p->next)
	{
		total += p->count;
		num++;
	}
	
	printf("total:%d num:%d\n",total,num);
	
}

/*取压缩的文件二叉树编码,对其解压数据*/
void ReHuffman(pTree proot,FILE *pr,FILE *pw,int count ,int Num_Byte)
{
	pTree p = proot;
	
	unsigned char ch,chcpy[8];
	int n;
	while( --Num_Byte&&fread(&ch,sizeof(unsigned char),1,pr)>0 )
	{
		printf("0x%x\n",ch);
		chcpy[0] = ch&128;
		chcpy[1] = ch&64;
		chcpy[2] = ch&32;
		chcpy[3] = ch&16;
		chcpy[4] = ch&8;
		chcpy[5] = ch&4;
		chcpy[6] = ch&2;
		chcpy[7] = ch&1;
		
		for(n=0; n<8; n++)	
			printf("%d--",chcpy[n]);
			
		n = 0;
		while( n<8 )
		{
			if( NULL==p->left )
			{
				printf("\n%d\n",n);
				printf("%d\n",chcpy[n]);
				printf("%c\n",p->data);
				fwrite(&p->data,sizeof(unsigned char ),1,pw);
				p = proot;
				continue;
			}
			
			if( chcpy[n] )
			{
				p = p->right;
			}
			else 
			{
				p = p->left;
			}
			
			n++;
		}
	}

	fread(&ch,sizeof(unsigned char),1,pr);
	chcpy[0] = ch&128;
	chcpy[1] = ch&64;
	chcpy[2] = ch&32;
	chcpy[3] = ch&16;
	chcpy[4] = ch&8;
	chcpy[5] = ch&4;
	chcpy[6] = ch&2;
	chcpy[7] = ch&1;
	
	for(n=0; n<8; n++)	
		printf("%d--",chcpy[n]);
	
	n = 0;
	while( n<count )
	{
		if( NULL==p->left )
		{
			printf("%c:\n",p->data);
			fwrite(&p->data,sizeof(char),1,pw);
			p = proot;
			continue;
		}
			
		if(chcpy[n])
		{
			p = p->right;
		}
		else 
		{
			p = p->left;
		}
		n++;
	}
	
}




 

 

//demo.c

/*************************************************************
    FileName : demo.c 
    FileFunc : 定义实现Huffman算法  
    Version  : V0.1  
    Author   : Sunrier  
    Date     : 2012-07-09 10:52:17 
    Descp    : Linux下实现Huffman算法压缩/解压文件 
*************************************************************/
#include <stdio.h>
#include "tree.h"
#include "queue.h"

int main(int argc,char *argv[])
{
	FILE *pr,*pw;
	pTree proot;
	
	Init_TreeNode(&proot);
	
	int num = 0;
	
	if( argc<4 )
	{
		fprintf(stderr,"Usage: \n ");
		fprintf(stderr," Compress : %s c sourcefile destfile \n", argv[0]);
		fprintf(stderr," Decompress : %s d sourcefile destfile \n", argv[0]);
		return 1;
	}
	
	if( 'c'==*argv[1] )/*表示压缩文件*/
	{
		pr = fopen(argv[2],"r");
		if( NULL==pr )
		{
			perror("Read file failed !\n");
			return -1;
		}
		
		pw = fopen(argv[3],"w");
		if( NULL==pw )
		{
			perror("Write file failed !\n");
			return 1;
		}
		
		num = Read_File(proot,pr,pw);
		Huffman(&proot);
		fseek(pr,0,SEEK_SET);
		Read_Huffman(proot,num,pr,pw);
	}
	else if('d'==*argv[1])/*表示解压文件*/
	{
		int count,Num_Byte,ch;
		
		pr = fopen(argv[2],"r");
		if( NULL==pr )
		{
			perror("Read file failed !\n");
			return 1;
		}
		
		fseek(pr,-12,SEEK_END);
		fread(&ch,sizeof(int),1,pr);
		fread(&count,sizeof(int),1,pr);
		fread(&Num_Byte,sizeof(int),1,pr);
		printf("%d/%d/%d\n",ch,count,Num_Byte);
		
		fseek(pr,0,SEEK_SET);
		Create_Huffman(proot,ch,pr);
		Huffman(&proot);
		fseek(pr,(sizeof(int)+sizeof(char))*ch,SEEK_SET);
		pw = fopen(argv[3],"w");
		if( NULL==pw )
		{
			perror("Write file failed !\n");
			return 1;
		}
		
		ReHuffman(proot,pr,pw,count,Num_Byte);
	}
	else
	{
		fprintf(stderr,"Usage: \n ");
		fprintf(stderr," Compress : %s c sourcefile destfile \n", argv[0]);
		fprintf(stderr," Decompress : %s d sourcefile destfile \n", argv[0]);
		return 1;
	}

	return 0;
}




 

 

//makefile

#makefile  
OBJS = demo    
all:$(OBJS)    
#CFLAGS = -O -w -ansi       
#CFLAGS = -O -Wall -ansi 
CFLAGS = -g -Wall -ansi       
CC = gcc $(CFLAGS)  
#SRCS = *.c
SRCS = demo.c tree.c stack.c queue.c

demo:$(SRCS)
	@$(CC) -o $@ $? 
clean	:
	@ls | grep -v ^makefile$$ | grep -v [.]c$$ | grep -v [.]h$$ | grep -v [.]sql$$ | grep -v [.]sh$$ | xargs rm -rf
#makefile	



 

[Sunrier@localhost Huffman]$ ls
demo.c  makefile  queue.c  queue.h  stack.c  stack.h  tree.c  tree.h
[Sunrier@localhost Huffman]$ make
[Sunrier@localhost Huffman]$ ls
demo  demo.c  makefile  queue.c  queue.h  stack.c  stack.h  tree.c  tree.h
[Sunrier@localhost Huffman]$ ./demo c demo.c 1
.................................
.................................
.................................
[Sunrier@localhost Huffman]$ ls
1  demo  demo.c  makefile  queue.c  queue.h  stack.c  stack.h  tree.c  tree.h
[Sunrier@localhost Huffman]$
[Sunrier@localhost Huffman]$ ./demo d 1 1.c
.................................
.................................
.................................
[Sunrier@localhost Huffman]$ ls
1  1.c  demo  demo.c  makefile  queue.c  queue.h  stack.c  stack.h  tree.c  tree.h
[Sunrier@localhost Huffman]$

 

注:此程序功能对于大文件的压缩和解压还无法实现

 

 

 

你可能感兴趣的:(Linux下实现Huffman编码压缩算法)