词典编码:LZ77算法(C/C++)

一、基本思想

用指向早期曾经出现过的字符串的指针来表示当前被编码的字符串,如:
词典编码:LZ77算法(C/C++)_第1张图片

二、LZ77算法

  • 算法伪码:词典编码:LZ77算法(C/C++)_第2张图片
  • 示意图:词典编码:LZ77算法(C/C++)_第3张图片
  • 举例:
    词典编码:LZ77算法(C/C++)_第4张图片

三、C/C++实现

#include 
#include 
#include 
using namespace std;

struct code {
	int off;
	int len;
	char nextChar;
}; 

#define maxWindow 10
vector<code> vt;

int main(int argc, char* argv[]) {
	//string str = "abacdbacc";
	string str = "abcdbbccaaabaeaaabaee";
	cout << str << endl;
	int window1, window2;
	
	//编码 
	for(int i = 0; i < str.length(); i++) {
		if(i + 1 <= maxWindow) window1 = i; //确定查询窗口的尺寸 
		else window1 = maxWindow;
		
		if(i + window1 < str.length()) window2 = window1;  
		else window2 = str.length() - i;
		
		//printf("%d %d %d\n", window1, window2, i);
		string str1 = str.substr(i - window1, window1);
		string str2 = str.substr(i, window2);
		//cout << str1 << " : " << str2 << endl;
		
		int off = -1;
		while(true) {
			if(window2 == 0) break; //空串,直接退出 
			string str3 = str2.substr(0, window2);
			off = str1.find(str3); //找不到,off = -1 
			//cout << str3 << " :: " << off << " :: " << i + window2 << endl;
			if(off != -1) break; //找到了,退出 
			window2--;
			if(window2 <= 0) break;
		}

		if(off != -1) {
			code cd;
			cd.len = window2;
			cd.off = window1 - off;
			cd.nextChar = str[i + window2];
			vt.push_back(cd);
			i += window2;
		} else {
			code cd;
			cd.len = 0;
			cd.off = 0;
			cd.nextChar = str[i + window2];
			vt.push_back(cd);
		}
	}
	
	for(int i = 0; i < vt.size(); i++) { //编码结果 
		printf("(%d,%d,%c)", vt[i].off, vt[i].len, vt[i].nextChar);
	}
	printf("\n");
	
	//解码 
	string strOut;
	for(int i = 0; i < vt.size(); i++) {
		if(vt[i].len == 0) {
			strOut += vt[i].nextChar;
		} else {
			int len = strOut.length();
			len -= vt[i].off;
			string temp = strOut.substr(len, vt[i].len);
			strOut += temp + vt[i].nextChar;
		}
	}
	cout << strOut;
	return 0; 
} 
  • 运行结果
    词典编码:LZ77算法(C/C++)_第5张图片

你可能感兴趣的:(Algorithm)