LZW编解码算法实现与分析实验报告

一:编解码原理

1.词典树的结构

尾缀字符(suffix)
母节点(parent)
第一个孩子节点(firstchild)
下一个兄弟节点(nextsibling)

2.编码原理
LZW编解码算法实现与分析实验报告_第1张图片

LZW的编码思想是不断地从字符流中提取新的字符串,通俗地理解为新“词条”,然后用“代号”也就是码字表示这个“词条”。这样一来,对字符流的编码就变成了用码字去替换字符流,生成码字流,从而达到压缩数据的目的。LZW编码是围绕称为词典的转换表来完成的。LZW编码器通过管理这个词典完成输入与输出之间的转换。LZW编码器的输入是字符流,字符流可以是用8位ASCII字符组成的字符串,而输出是用n位(例如12位)表示的码字流。LZW编码算法的步骤如下:
步骤1:将词典初始化为包含所有可能的单字符,当前前缀P初始化为空。
步骤2:当前字符C=字符流中的下一个字符。
步骤3:判断P+C是否在词典中
(1)如果“是”,则用C扩展P,即让P=P+C,返回到步骤2。
(2)如果“否”,则
输出与当前前缀P相对应的码字W;
将P+C添加到词典中;
令P=C,并返回到步骤2

3.解码原理
LZW编解码算法实现与分析实验报告_第2张图片

LZW解码算法开始时,译码词典和编码词典相同,包含所有可能的前缀根。具体解码算法如下:
步骤1:在开始译码时词典包含所有可能的前缀根。
步骤2:令CW:=码字流中的第一个码字。
步骤3:输出当前缀-符串string.CW到码字流。
步骤4:先前码字PW:=当前码字CW。
步骤5:当前码字CW:=码字流的下一个码字。
步骤6:判断当前缀-符串string.CW 是否在词典中。
(1)如果”是”,则把当前缀-符串string.CW输出到字符流。
当前前缀P:=先前缀-符串string.PW。
当前字符C:=当前前缀-符串string.CW的第一个字符。
把缀-符串P+C添加到词典。
(2)如果”否”,则当前前缀P:=先前缀-符串string.PW。
当前字符C:=当前缀-符串string.CW的第一个字符。
输出缀-符串P+C到字符流,然后把它添加到词典中。
步骤7:判断码字流中是否还有码字要译。
(1)如果”是”,就返回步骤4。
(2)如果”否”,结束。

二:代码部分

bitio.h

#pragma once
/*
 * Declaration for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */
#ifndef __BITIO__
#define __BITIO__

#include 

typedef struct {
	FILE *fp;
	unsigned char mask;
	int rack;
}BITFILE;

BITFILE *OpenBitFileInput(char *filename);
BITFILE *OpenBitFileOutput(char *filename);
void CloseBitFileInput(BITFILE *bf);
void CloseBitFileOutput(BITFILE *bf);
int BitInput(BITFILE *bf);
unsigned long BitsInput(BITFILE *bf, int count);
void BitOutput(BITFILE *bf, int bit);
void BitsOutput(BITFILE *bf, unsigned long code, int count);
#endif	// __BITIO__

bitio.cpp

/*
 * Definitions for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */

#include 
#include 
#include "bitio.h"

//打开输入文件
BITFILE *OpenBitFileInput(char *filename) {
	BITFILE *bf;
	bf = (BITFILE *)malloc(sizeof(BITFILE));
	if (NULL == bf) return NULL;
	if (NULL == filename)	bf->fp = stdin;
	//else bf->fp = fopen(filename, "rb");
	else 
		fopen_s(&(bf->fp), filename, "rb");
	if (NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

//打开输出文件
BITFILE *OpenBitFileOutput(char *filename) {
	BITFILE *bf;
	bf = (BITFILE *)malloc(sizeof(BITFILE));
	if (NULL == bf) return NULL;
	if (NULL == filename)	bf->fp = stdout;
	//else bf->fp = fopen(filename, "wb");
	else
		fopen_s(&(bf->fp), filename, "wb");
	if (NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

void CloseBitFileInput(BITFILE *bf) {
	fclose(bf->fp);
	free(bf);
}

void CloseBitFileOutput(BITFILE *bf) {
	// Output the remaining bits
	if (0x80 != bf->mask) fputc(bf->rack, bf->fp);
	fclose(bf->fp);
	free(bf);
}

int BitInput(BITFILE *bf) {
	int value;

	if (0x80 == bf->mask) {
		bf->rack = fgetc(bf->fp);
		if (EOF == bf->rack) {
			fprintf(stderr, "Read after the end of file reached\n");
			exit(-1);
		}
	}
	value = bf->mask & bf->rack;
	bf->mask >>= 1;
	if (0 == bf->mask) bf->mask = 0x80;
	return((0 == value) ? 0 : 1);
}

unsigned long BitsInput(BITFILE *bf, int count) {
	unsigned long mask;
	unsigned long value;
	mask = 1L << (count - 1);
	value = 0L;
	while (0 != mask) {
		if (1 == BitInput(bf))
			value |= mask;
		mask >>= 1;
	}
	return value;
}

void BitOutput(BITFILE *bf, int bit) {
	if (0 != bit) bf->rack |= bf->mask;
	bf->mask >>= 1;
	if (0 == bf->mask) {	// eight bits in rack
		fputc(bf->rack, bf->fp);
		bf->rack = 0;
		bf->mask = 0x80;
	}
}

void BitsOutput(BITFILE *bf, unsigned long code, int count) {
	unsigned long mask;

	mask = 1L << (count - 1);
	while (0 != mask) {
		BitOutput(bf, (int)(0 == (code&mask) ? 0 : 1));
		mask >>= 1;
	}
}
#if 0
int main(int argc, char **argv) {
	BITFILE *bfi, *bfo;
	int bit;
	int count = 0;

	if (1 < argc) {
		if (NULL == OpenBitFileInput(bfi, argv[1])) {
			fprintf(stderr, "fail open the file\n");
			return -1;
		}
	}
	else {
		if (NULL == OpenBitFileInput(bfi, NULL)) {
			fprintf(stderr, "fail open stdin\n");
			return -2;
		}
	}
	if (2 < argc) {
		if (NULL == OpenBitFileOutput(bfo, argv[2])) {
			fprintf(stderr, "fail open file for output\n");
			return -3;
		}
	}
	else {
		if (NULL == OpenBitFileOutput(bfo, NULL)) {
			fprintf(stderr, "fail open stdout\n");
			return -4;
		}
	}
	while (1) {
		bit = BitInput(bfi);
		fprintf(stderr, "%d", bit);
		count++;
		if (0 == (count & 7))fprintf(stderr, " ");
		BitOutput(bfo, bit);
	}
	return 0;
}
#endif

LZW_E.cpp

/*
 * Definition for LZW coding
 *
 * vim: ts=4 sw=4 cindent nowrap
 */
#include 
#include 
#include "bitio.h"
#define MAX_CODE 65535

struct 
{
	int suffix;
	int parent, firstchild, nextsibling;
} dictionary[MAX_CODE + 1];
int next_code;
int d_stack[MAX_CODE]; // stack for decoding a phrase

#define input(f) ((int)BitsInput( f, 16))
#define output(f, x) BitsOutput( f, (unsigned long)(x), 16)

int DecodeString(int start, int code);
void InitDictionary(void);
void PrintDictionary(void) 
{
	int n;
	int count;
	for (n = 256; n < next_code; n++) {
		count = DecodeString(0, n);
		printf("%4d->", n);
		while (0 < count--) printf("%c", (char)(d_stack[count]));
		printf("\n");
	}
}

int DecodeString(int start, int code) {
	int count;
	count = start;
	while (0 <= code) {
		d_stack[count] = dictionary[code].suffix;
		code = dictionary[code].parent;
		count++;
	}
	return count;
}

//初始化词典
void InitDictionary(void) {
	int i;

	for (i = 0; i < 256; i++) {
		dictionary[i].suffix = i;
		dictionary[i].parent = -1;
		dictionary[i].firstchild = -1;
		dictionary[i].nextsibling = i + 1;
	}
	dictionary[255].nextsibling = -1;
	next_code = 256;     //定义新词的位置
}
/*
 * Input: string represented by string_code in dictionary,
 * Output: the index of character+string in the dictionary
 * 		index = -1 if not found
 */
int InDictionary(int character, int string_code) {
	int sibling;
	if (0 > string_code) return character;
	sibling = dictionary[string_code].firstchild;  //寻找第一个孩子节点
	while (-1 < sibling) {
		if (character == dictionary[sibling].suffix) return sibling;
		sibling = dictionary[sibling].nextsibling;
	}
	return -1;
}

void AddToDictionary(int character, int string_code)     //将新的字符串加入到词典内
{
	int firstsibling, nextsibling;
	if (0 > string_code) return;
	dictionary[next_code].suffix = character;    //当前尾缀字符为character
	dictionary[next_code].parent = string_code;  //母节点为string_code
	dictionary[next_code].nextsibling = -1;      //无下一个兄弟节点
	dictionary[next_code].firstchild = -1;       //无第一个孩子节点
	firstsibling = dictionary[string_code].firstchild;  //查找母节点string_code的第一个孩子节点
	if (-1 < firstsibling) {	// the parent has child
		nextsibling = firstsibling;
		while (-1 < dictionary[nextsibling].nextsibling)
			nextsibling = dictionary[nextsibling].nextsibling;
		dictionary[nextsibling].nextsibling = next_code;
	}
	else {// no child before, modify it to be the first
		dictionary[string_code].firstchild = next_code;
	}
	next_code++;
}

void LZWEncode(FILE *fp, BITFILE *bf) {
	int character;
	int string_code;
	int index;
	unsigned long file_length;

	fseek(fp, 0, SEEK_END);
	file_length = ftell(fp);              //读取源文件的长度
	fseek(fp, 0, SEEK_SET);               //指回源文件开头
	BitsOutput(bf, file_length, 4 * 8);   //写文件长度
	InitDictionary();                     //初始化词典
	string_code = -1; 
	while (EOF != (character = fgetc(fp))) 
	{
		index = InDictionary(character, string_code);   //index=-1时,string+character不在词典内
		if (0 <= index) {	// string+character in dictionary
			string_code = index;
		}
		else {	// string+character not in dictionary
			output(bf, string_code);
			if (MAX_CODE > next_code) {	// free space in dictionary
				// add string+character to dictionary
				AddToDictionary(character, string_code);   //写入新词
			}
			string_code = character;    //string_code重新赋值为character,开始下一个词的编码
		}
	}
	output(bf, string_code);
}

void LZWDecode(BITFILE *bf, FILE *fp) {
	int character;
	int new_code, last_code;
	int phrase_length;
	unsigned long file_length;

	file_length = BitsInput(bf, 4 * 8);   //写文件长度
	if (-1 == file_length) file_length = 0;
	InitDictionary();       //初始化词典树
	last_code = -1;
	while (0 < file_length) 
	{
		new_code = input(bf);
		if (new_code >= next_code)  //不在词典内
		{ // this is the case CSCSC( not in dict)
			d_stack[0] = character;
			phrase_length = DecodeString(1, last_code);
		}
		else
		{
			phrase_length = DecodeString(0, new_code);
		}
		character = d_stack[phrase_length - 1];
		while (0 < phrase_length)
		{
			phrase_length--;
			fputc(d_stack[phrase_length], fp);
			file_length--;
		}
		if (MAX_CODE > next_code)
		{	// add the new phrase to dictionary
			AddToDictionary(character, last_code);
		}
		last_code = new_code;
	}
}




int main(int argc, char **argv) {
	FILE *fp;    //输入
	BITFILE *bf; //输出

	//argv[2]原始文件
	//argv[3]生成目标文件
	if (4 > argc) 
	{
		fprintf(stdout, "usage: \n%s   \n", argv[0]);
		fprintf(stdout, "\t: E or D reffers encode or decode\n");
		fprintf(stdout, "\t: input file name\n");
		fprintf(stdout, "\t: output file name\n");
		return -1;
	}

	//argv[1][0]='E',编码
	if ('E' == argv[1][0])// do encoding
	{ 
		errno_t err = 0;
		err = fopen_s(&fp, argv[2], "rb");
		bf = OpenBitFileOutput(argv[3]);
		if (fp == NULL)
		{
			printf("pf is NULL");
			return 0;
		}
		if (bf == NULL)
		{
			printf("bf is NULL");
			return 0;
		}
	
		printf("encoding\n");
		
		if (NULL != fp && NULL != bf)
		{
			LZWEncode(fp, bf);
			fclose(fp);
			CloseBitFileOutput(bf);
			fprintf(stdout, "encoding done\n");
		}
		else
			printf("error");
	}

	//argv[1][0]='D',解码
	else if ('D' == argv[1][0]) 
	{	// do decoding
		bf = OpenBitFileInput(argv[2]);
		//fp = fopen(argv[3], "wb");
		errno_t err = 0;
		err = fopen_s(&fp, argv[3], "wb");
		if (fp == NULL)
		{
			printf("pf is NULL");
			return 0;
		}
		if (bf == NULL)
		{
			printf("bf is NULL");
			return 0;
		}
		printf("decoding\n");
		if (NULL != fp && NULL != bf) {
			LZWDecode(bf, fp);
			fclose(fp);
			CloseBitFileInput(bf);
			fprintf(stdout, "decoding done\n");
		}
	}
	else {	// otherwise
		fprintf(stderr, "not supported operation\n");
	}
	return 0;
}

三:运行结果

对十种不同格式的文件分别进行编解码
原始文件:
LZW编解码算法实现与分析实验报告_第3张图片编码后生成文件
LZW编解码算法实现与分析实验报告_第4张图片

解码后生成文件
LZW编解码算法实现与分析实验报告_第5张图片

原始文件格式 原始文件大小 编码后文件大小 压缩比
doc 316KB 319KB 0.9906
qcif 891KB 553KB 1.6112
yuv 732KB 96KB 7.625
jpg 463KB 518KB 0.8938
txt 100KB 60KB 1.6667
tga 1201KB 1387KB 0.8659
pdf 2785KB 3288KB 0.8470
xls 140KB 89KB 1.5730
pptx 208KB 267KB 0.7790
png 129KB 174KB 0.7414

通过对十种不同格式的文件进行LZW编码,发现并不是所有文件编码后都会得到压缩,有些文件反而会更大。

你可能感兴趣的:(LZW编解码算法实现与分析实验报告)