Longest Common Substring

Longest Common Substring (not subsequence, for the subsequence solution, please find answers in the book <<INTRODUCTION TO ALGORITHMS>>) is to find (one of) the longest common substring in two strings. It is short for LCS in this article (here we only need to find one of LCS). See an example first.

s1 =      OldSite:GeeksforGeeks.org

s2 =      NewSite:GeeksQuiz.com

LPS =          Site:Geeks

Ok, I think now you are clear about the definition of the LCS (again not subsequence). Let's start our related algorithms and let them evolve.


I. Naive Recursive Version, time complexity O(2^max(n, m)) (worst case, O(nm)=O(n(m-1))+O((n-1)m)+1), space complexity O(n+m) = O(h) for stack operation.

string longestCommonSubstring(const string &s1, int end1, const string &s2, int end2) {
	if (end1<0 || end2<0) return "";
	if (s1[end1] == s2[end2]) {
		string str = longestCommonSubstring(s1, end1-1, s2, end2-1);
		if (str == s1.substr(end1-str.size(), str.size()) && str == s2.substr(end2-str.size(), str.size())) {
			return str + s1[end1];
		}
	}
	string str1 = longestCommonSubstring(s1, end1, s2, end2-1);
	string str2 = longestCommonSubstring(s1, end1-1, s2, end2);
	return str1.size()>=str2.size()? str1: str2;
}


string longestCommonSubstring(string s1, string s2) {
	return longestCommonSubstring(s1, s1.size()-1, s2, s2.size()-1);
}


II. Top-down Dynamic Programming Version, time complexity O(nm) (worst case, O(nm)=O(n(m-1))+n), space complexity O(nm).

string longestCommonSubstring(const string &s1, int end1, const string &s2, int end2, vector<vector<string>> &mem) {
	if (end1<0 || end2<0) return "";
	if (mem[end1][end2] != "#") return mem[end1][end2];
	if (s1[end1] == s2[end2]) {
		string str = longestCommonSubstring(s1, end1-1, s2, end2-1, mem);
		if (str == s1.substr(end1-str.size(), str.size()) && str == s2.substr(end2-str.size(), str.size())) {
			return mem[end1][end2] = str + s1[end1];
		}
	}
	string str1 = longestCommonSubstring(s1, end1, s2, end2-1, mem);
	string str2 = longestCommonSubstring(s1, end1-1, s2, end2, mem);
	return mem[end1][end2] = str1.size()>=str2.size()? str1: str2;
}

string longestCommonSubstring(string s1, string s2) {
	int N = s1.size(), M = s2.size();
	vector<vector<string>> mem(N, vector<string>(M, "#"));
	return longestCommonSubstring(s1, N-1, s2, M-1, mem);
}


III. Bottom-up Dynamic Programming Version, time complexity O(nm), space complexity O(nm).

string longestCommonSubstring(string s1, string s2) {
	int N = s1.size(), M = s2.size();
	vector<vector<string>> mem(N+1, vector<string>(M+1, ""));
	for (int i=0; i<N; ++i) {
		for (int j=0; j<M; ++j) {
			if (s1[i] == s2[j]) {
				int len = mem[i][j].size();
				if (mem[i][j] == s1.substr(i-len, len) && mem[i][j] == s2.substr(j-len, len)) {
					mem[i+1][j+1] = mem[i][j] + s1[i];
					continue;
				}
			} 
			mem[i+1][j+1] = mem[i+1][j].size()>=mem[i][j+1].size()? mem[i+1][j]: mem[i][j+1];
		}
	}
	return mem[N][M];
}


IV. Space Optimized Bottom-up Dynamic Programming Version, time complexity O(nm), space complexity O(max(n,m)).

string longestCommonSubstring(string s1, string s2) {
	int N = s1.size(), M = s2.size();
	vector<vector<string>> mem(2, vector<string>(M+1, ""));
	for (int i=0; i<N; ++i) {
		for (int j=0; j<M; ++j) {
			if (s1[i] == s2[j]) {
				int len = mem[i%2][j].size();
				if (mem[i%2][j] == s1.substr(i-len, len) && mem[i%2][j] == s2.substr(j-len, len)) {
					mem[(i+1)%2][j+1] = mem[i%2][j] + s1[i];
					continue;
				}
			} 
			mem[(i+1)%2][j+1] = mem[(i+1)%2][j].size()>=mem[i%2][j+1].size()? mem[(i+1)%2][j]: mem[i%2][j+1];
		}
	}
	return mem[N%2][M];
}


V. Tracing Bottom-up Dynamic Programming Version, time complexity O(nm), space complexity O(nm).

/***************** This version is used when string is very large ***************/  

string longestCommonSubstring(string s1, string s2) {
	int N = s1.size(), M = s2.size(), len = 0, r = -1;
	vector<vector<int>> mem(N, vector<int>(M, 0));
	for (int i=0; i<N; ++i) {
		for (int j=0; j<M; ++j) {
			if (s1[i] == s2[j]) {
				mem[i][j] = (i==0 || j==0)? 1: mem[i-1][j-1]+1;
				if (mem[i][j] > len) len = mem[i][j], r = i;
			} else {
				mem[i][j] = 0;
			}
		}
	}
	return s1.substr(r-len+1, len);
}


VI. Space Optimized Tracing Bottom-up Dynamic Programming Version, time complexity O(nm), space complexity O(max(n,m)).

/***************** This version is used when string is very large ***************/  

string longestCommonSubstring(string s1, string s2) {
	int N = s1.size(), M = s2.size(), len = 0, r = -1;
	vector<vector<int>> mem(2, vector<int>(M, 0));
	for (int i=0; i<N; ++i) {
		for (int j=0; j<M; ++j) {
			if (s1[i] == s2[j]) {
				mem[i%2][j] = (i==0 || j==0)? 1: mem[(i-1)%2][j-1]+1;
				if (mem[i%2][j] > len) len = mem[i%2][j], r = i;
			} else {
				mem[i%2][j] = 0;
			}
		}
	}
	return s1.substr(r-len+1, len);
}


VII. Further Space Optimized Tracing Bottom-up Dynamic Programming Version, time complexity O(nm), space complexity O(1).

/***************** This version is used when string is very large ***************/  

string longestCommonSubstring(string s1, string s2) {
	int N = s1.size(), M = s2.size(), len = 0, r = -1;
	for (int i=0; i<N; ++i) {
		int mem = 0;
		for (int j=i, k=0; j<N && k<M; ++j, ++k) {
			mem = s1[j]==s2[k]? mem+1: 0;
			if (mem > len) len = mem, r = j;
		}
	}
	for (int i=1; i<M; ++i) {
		int mem = 0;
		for (int j=0, k=i; j<N && k<M; ++j, ++k) {
			mem = s1[j]==s2[k]? mem+1: 0;
			if (mem > len) len = mem, r = j;
		}
	}
	return s1.substr(r-len+1, len);
}

VIII. Tracing Top-down Dynamic Programming Version, time complexity O(nm), space complexity O(nm).

/***************** This version is used when string is very large ***************/  

int longestCommonSubstring(const string &s1, int end1, const string &s2, int end2, vector<vector<int>> &mem, int &len, int &r) {
	if (end1<0 || end2<0) return 0;
	if (mem[end1][end2] != -1) return mem[end1][end2];
	if (s1[end1] == s2[end2]) {
		mem[end1][end2] = longestCommonSubstring(s1, end1-1, s2, end2-1, mem, len, r) + 1;
		if (mem[end1][end2] > len) len = mem[end1][end2], r = end1;
		return mem[end1][end2];
	}
	longestCommonSubstring(s1, end1, s2, end2-1, mem, len, r);
	longestCommonSubstring(s1, end1-1, s2, end2, mem, len, r);
	return mem[end1][end2] = 0;
}

string longestCommonSubstring(string s1, string s2) {
	int N = s1.size(), M = s2.size(), len = 0, r = -1;
	vector<vector<int>> mem(N, vector<int>(M, -1));
	longestCommonSubstring(s1, N-1, s2, M-1, mem, len, r);
	return s1.substr(r-len+1, len);
}











你可能感兴趣的:(Algorithm)