LZW 文本压缩及解压

LZW就是通过建立一个字符串表,用较短的代码来表示较长的字符串来实现压缩。

压缩算法如下

#include <iostream>
#include <fstream>

std::ifstream fin;
std::ofstream fout;
using namespace std;

const int D = 4096,
		alpha = 256,
		ByteSize = 8,
		HalfByte = 4,
		AllBit = 255,
		HalfBit = 15;
int used, state = 0;
char LastChar, LeftHalf;
unsigned long outch;
struct
{
	long int code;
}ht[D];
bool search(long int cd, int& tk)
{
	for (int i = 0; i < used; i++)
		if (ht[i].code == cd)
		{
			tk = i;
			return true;
		}
	return false;
}
bool openfile()
{
	fin.open("123.txt", std::ios_base::binary);
	fout.open("123.lzw", std::ios_base::binary | std::ios_base::out);
	if (fin.fail() || fout.fail())
		return false;
	else
		return true;
}
void output(long val)
{
	unsigned char ch, ck;
	if (state)
	{
		ch = (val >> ByteSize & HalfBit) | (LeftHalf << HalfByte);
		ck = val & AllBit;
		
		fout.put(ch);
		fout.put(ck);
		state = 0;
	}
	else
	{
		ch = val >> HalfByte;
		fout.put(ch);
	
		LeftHalf = val & HalfBit;
		state = 1;
	}
}
void compress()
{
	int i;
	for (i = 0; i < alpha; i++)	//用256个字符进行初始化
		ht[i].code = i;
	used = alpha; //已使用

	char ch;
	unsigned long cd, pcode;
	int tk;
	fin.get(ch);
	pcode = ch;
	while(!fin.eof())
	{
		fin.get(ch);
		
		cd = (pcode << ByteSize) + (unsigned char)ch;
		if (fin.eof())
		{
			//break;
		}
		if (search(cd, tk))
		{
			pcode = tk;
		}
		else
		{
			output(pcode);
			if (used < D)
			{
				ht[used].code = (pcode << ByteSize | ((unsigned)ch));
				used++;
			}
			pcode = ch;
		}
	}
	output(pcode);
	if (state)
	{
		cout <<"error\n";
		ch = LeftHalf << HalfByte;
		fout.put(ch);
	}
}
int main()
{

	if (openfile())
		cout <<"file ready!\n";
	compress();
	cout <<"ok\n";
	int i;
	/*
	for (i = 0; i < 256; i++)
		cout <<i <<"\t"<< ht[i].key <<"\t" <<char(ht[i].code) <<endl;
		*/
	for (i = 256; i < used; i++)
		//cout <<i  <<"\t" <<(ht[i].code>>8) <<"  "<<char(ht[(ht[i].code>>8)].code)<<"  "<<char(ht[i].code & AllBit) <<endl;
	fin.close();
	fout.close();
	return 0;
}


解压算法如下

hashlist.h

/********************************************************************
    purpose:    链表散列
    author:     xianyun1230
    QQ:         836663997
    e-mail:     [email protected]
    created:    2014/02/23
*********************************************************************/
template<typename K, typename T>
class hashlist
{
	public:
		hashlist(int num);
		~hashlist();
		bool search(const K k, T& val);
		bool insert(const K k, T val);
		bool del(const K k);
		void show()const;
	private:
		typedef struct _node
		{
			K key;
			T data;
			struct _node *next;
		}node;
		node* hsearch(K k);
		int D;
		node **lst;
};
template<typename K, typename T>
hashlist<K,T>::hashlist(int num)
{
	D = num; 
	lst = new node *[D];
	for (int i = 0; i < D; i++)
	{
		lst[i] = new node;
		lst[i]->next = NULL;
	}
}
template<typename K, typename T>
hashlist<K,T>::~hashlist()
{
	node *tmp, *ph;
	for (int i = 0; i < D; ++i)
	{
		tmp = lst[i]->next;
		while (tmp)
		{
			ph = tmp->next;
			delete tmp;
			tmp = ph;
		}
		delete lst[i];
	}
	delete lst;
}
template<typename K, typename T>
hashlist<K,T>::node* hashlist<K,T>::hsearch(K k)
{
	node *tmp = lst[k % D];
	while(tmp->next) 
	{
		if (tmp->next->key == k)
			return tmp;
		tmp = tmp->next;
	}
	return tmp;
}
template<typename K, typename T>
bool hashlist<K,T>::search(const K k, T& val)
{
	node* pt = hsearch(k);
	if (pt->next == NULL)
		return false;
	if (pt->next->key == k)
	{
		val = pt->next->data;
		return true;
	}
	else 
		return false;
}
template<typename K, typename T>
bool hashlist<K,T>::insert(const K k,T val)
{
	node* pt = hsearch(k);
	if (pt)
	{
		node* tmp = new node;
		tmp->key = k;
		tmp->data = val;
		tmp->next = pt->next;
		pt->next = tmp;
		return true;
	}
	return false;
}
template<typename K, typename T>
bool hashlist<K,T>::del(const K k)
{
	node* pt = hsearch(k);
	if (pt->next != NULL)
	{
		node* tmp = pt->next;
		pt->next = tmp->next;
		delete tmp;
		if (pt == lst[k%D])
			pt->next = NULL;
		return true;
	}
	return false;
}
template<typename K, typename T>
void hashlist<K,T>::show() const
{
	node* tmp;
	for (int i = 0; i < D; ++i)
	{
		tmp = lst[i]->next;
		while (tmp)
		{
			std::cout <<tmp->data <<"  ";
			tmp = tmp->next;
		}
	}
	std::cout <<std::endl;
}


main.cpp

#include <iostream>
#include <fstream>
#include "hashlist.h"

std::ifstream fin;
std::ofstream fout;

const int D = 4096,
		alpha = 256,
		ByteSize = 8,
		HalfByte = 4,
		AllBit = 255,
		HalfBit = 15;
int used, state = 0;
char LastChar, LeftHalf;
unsigned long outch;
char s[D];
int slen;
struct
{
	int pre;
	char ch;
}ht[D];

bool openfile()
{
	fin.open("123.lzw", std::ios_base::binary);
	fout.open("123-lzw.txt", std::ios_base::binary | std::ios_base::out);
	if (fin.fail() || fout.fail())
		return false;
	else
		return true;
}
bool getcode(int &code)
{
	char tch, tck;
	unsigned char ch, ck;
	fin.get(tch);
	ch = tch;
	if (fin.eof())
		return false;
	code = ch;
	if (state)
	{
		code = (LeftHalf << ByteSize) | ch;
		state = 0;
	}
	else
	{
		fin.get(tck);
		//if (fin.eof())
		//	return false;
		ck = tck;
		code = (code << HalfByte) | (ck >> HalfByte);
		LeftHalf = ck & HalfBit;
		state = 1;
	}
	return true;
}
void output(int code)
{
	slen = -1;
	while (code >= alpha)
	{
		s[++slen] = ht[code].ch;
		code = ht[code].pre;
	}
	s[++slen] = code;
	for (int i = slen; i >= 0; --i)
		fout.put(s[i]);
}
void decompress()
{
	int i, pcode, ccode;
	for (i = 0; i < alpha; i++)	//用256个字符进行初始化
		ht[i].ch = i;
	used = alpha; //已使用

	if (getcode(pcode))
	{
		s[0] = pcode;
		fout.put(s[0]);
		slen = 0;

		while (getcode(ccode))
		{
			if (ccode < used)
			{
				output(ccode);
				if (ccode < D)
				{
					ht[used].pre = pcode;
					ht[used].ch = s[slen];
					used++;
				}
			}
			else
			{
				ht[used].pre = pcode;
				ht[used].ch = s[slen];
				used++;
				output(ccode);
			}
			pcode = ccode;
		}
	}
}
int main()
{
	using namespace std;
	if (openfile())
		cout <<"file ready!\n";
	decompress();
	cout <<"ok\n";
	for (int i = 0; i < used; i++)
		cout <<i <<"\t"<< ht[i].pre <<" "<<ht[ht[i].pre].ch<<" " <<ht[i].ch <<endl;
	fin.close();
	fout.close();
	return 0;
}

另外这段代码疑似有些错误, 此代码仅供大家参考使用,同时也 希望各路大神能给与指导~ 

你可能感兴趣的:(算法,压缩,LZW,文本压缩)