动态规划法解最长公共子序列问题

问题描述

给定两个字符串,求解这两个字符串的最长公共子序列(Longest Common Sequence)。
输入序列“ ABCDGH”和“ AEDFHR”的LCS为长度3的“ ADH”。
输入序列“ AGGTAB”和“ GXTXAYB”的LCS为长度4的“ GTAB”。

最佳子结构:

假设输入序列分别为长度为m和n的X [0…m-1]和Y [0…n-1]。并令L(X [0…m-1],Y [0…n-1])为两个序列X和Y的LCS的长度。以下是L(X [0 … m-1],Y [0…n-1])。

如果两个序列的最后一个字符匹配(或X [m-1] == Y [n-1]),则
L(X [0…m-1],Y [0…n-1])= 1 + L(X [0…m-2],Y [0…n-2])

如果两个序列的最后一个字符不匹配(或X [m-1]!= Y [n-1]),则
L(X [0…m-1],Y [0…n-1])= MAX(L(X [0…m-2],Y [0…n-1]),L(X [0…m-1],Y [0…n-2]))

示例:
1)考虑输入字符串“ AGGTAB”和“ GXTXAYB”。最后一个字符与字符串匹配。因此,LCS的长度可以写为:
L(“ AGGTAB”,“ GXTXAYB”)= 1 + L(“ AGGTA”,“ GXTXAY”)
动态规划法解最长公共子序列问题_第1张图片
2)考虑输入字符串“ ABCDGH”和“ AEDFHR”。字符串的最后一个字符不匹配。因此,LCS的长度可以写成:
L(“ ABCDGH”,“ AEDFHR”)= MAX(L(“ ABCDG”,“ AEDFH R ”),L(“ ABCDG H ”,“ AEDFH”))

因此,LCS问题具有最佳的子结构属性,因为可以使用子问题的解决方案来解决主要问题。

实现

以下是LCS问题的简单递归实现。该实现仅遵循上述递归结构。
c++

/* A Naive recursive implementation of LCS problem */
#include  
using namespace std; 

int max(int a, int b); 

/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char *X, char *Y, int m, int n ) 
{ 
	if (m == 0 || n == 0) 
		return 0; 
	if (X[m-1] == Y[n-1]) 
		return 1 + lcs(X, Y, m-1, n-1); 
	else
		return max(lcs(X, Y, m, n-1), lcs(X, Y, m-1, n)); 
} 

/* Utility function to get max of 2 integers */
int max(int a, int b) 
{ 
	return (a > b)? a : b; 
} 

/* Driver code */
int main() 
{ 
	char X[] = "AGGTAB"; 
	char Y[] = "GXTXAYB"; 
	
	int m = strlen(X); 
	int n = strlen(Y); 
	
	cout<<"Length of LCS is "<< lcs( X, Y, m, n ) ; 
	
	return 0; 
} 


java版



/* A Naive recursive implementation of LCS problem in java*/
public class LongestCommonSubsequence 
{ 
  
  /* Returns length of LCS for X[0..m-1], Y[0..n-1] */
  int lcs( char[] X, char[] Y, int m, int n ) 
  { 
    if (m == 0 || n == 0) 
      return 0; 
    if (X[m-1] == Y[n-1]) 
      return 1 + lcs(X, Y, m-1, n-1); 
    else
      return max(lcs(X, Y, m, n-1), lcs(X, Y, m-1, n)); 
  } 
  
  /* Utility function to get max of 2 integers */
  int max(int a, int b) 
  { 
    return (a > b)? a : b; 
  } 
  
  public static void main(String[] args) 
  { 
    LongestCommonSubsequence lcs = new LongestCommonSubsequence(); 
    String s1 = "AGGTAB"; 
    String s2 = "GXTXAYB"; 
  
    char[] X=s1.toCharArray(); 
    char[] Y=s2.toCharArray(); 
    int m = X.length; 
    int n = Y.length; 
  
    System.out.println("Length of LCS is" + " " + 
                                  lcs.lcs( X, Y, m, n ) ); 
  } 
  
} 

python版


# A Naive recursive Python implementation of LCS problem 
  
def lcs(X, Y, m, n): 
  
    if m == 0 or n == 0: 
       return 0; 
    elif X[m-1] == Y[n-1]: 
       return 1 + lcs(X, Y, m-1, n-1); 
    else: 
       return max(lcs(X, Y, m, n-1), lcs(X, Y, m-1, n)); 
  
  
# Driver program to test the above function 
X = "AGGTAB"
Y = "GXTXAYB"
print "Length of LCS is ", lcs(X , Y, len(X), len(Y)) 

输出:

LCS的长度是4

上述递归方法的时间复杂度在最坏的情况下为O(2 ^ n),最坏的情况发生在X和Y的所有字符不匹配(即LCS的长度为0)时。
考虑到上述实现,以下是针对输入字符串“ AXYT”和“ AYZX”

                         lcs(“ AXYT”,“ AYZX”)
                       /                 
         lcs(“ AXY”,“ AYZX”)lcs(“ AXYT”,“ AYZ”)
         ///               
lcs(“ AX”,“ AYZX”)lcs(“ AXY”,“ AYZ”)lcs(“ AXY”,“ AYZ”)lcs(“ AXYT”,“ AY”)

在上面的部分递归树中,lcs(“ AXY”,“ AYZ”)被求解两次。如果我们绘制完整的递归树,则可以看到有很多子问题可以一次又一次地解决。因此,此问题具有“重叠子结构”属性,可以通过使用“记忆化”或“制表”来避免重新计算相同子问题。以下是LCS问题的列表实现。

动态规划法解最长公共子序列问题_第2张图片

/* Dynamic Programming C++ implementation of LCS problem */
#include 
using namespace std; 

int max(int a, int b); 

/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char *X, char *Y, int m, int n ) 
{ 
	int L[m + 1][n + 1]; 
	int i, j; 
	
	/* Following steps build L[m+1][n+1] in 
	bottom up fashion. Note that L[i][j] 
	contains length of LCS of X[0..i-1] 
	and Y[0..j-1] */
	for (i = 0; i <= m; i++) 
	{ 
		for (j = 0; j <= n; j++) 
		{ 
		if (i == 0 || j == 0) 
			L[i][j] = 0; 
	
		else if (X[i - 1] == Y[j - 1]) 
			L[i][j] = L[i - 1][j - 1] + 1; 
	
		else
			L[i][j] = max(L[i - 1][j], L[i][j - 1]); 
		} 
	} 
		
	/* L[m][n] contains length of LCS 
	for X[0..n-1] and Y[0..m-1] */
	return L[m][n]; 
} 

/* Utility function to get max of 2 integers */
int max(int a, int b) 
{ 
	return (a > b)? a : b; 
} 

// Driver Code 
int main() 
{ 
	char X[] = "AGGTAB"; 
	char Y[] = "GXTXAYB"; 
	
	int m = strlen(X); 
	int n = strlen(Y); 
	
	cout << "Length of LCS is "
		<< lcs( X, Y, m, n ); 
	
	return 0; 
} 


/* Dynamic Programming Java implementation of LCS problem */
public class LongestCommonSubsequence 
{ 

/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char[] X, char[] Y, int m, int n ) 
{ 
	int L[][] = new int[m+1][n+1]; 

	/* Following steps build L[m+1][n+1] in bottom up fashion. Note 
		that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1] */
	for (int i=0; i<=m; i++) 
	{ 
	for (int j=0; j<=n; j++) 
	{ 
		if (i == 0 || j == 0) 
			L[i][j] = 0; 
		else if (X[i-1] == Y[j-1]) 
			L[i][j] = L[i-1][j-1] + 1; 
		else
			L[i][j] = max(L[i-1][j], L[i][j-1]); 
	} 
	} 
return L[m][n]; 
} 

/* Utility function to get max of 2 integers */
int max(int a, int b) 
{ 
	return (a > b)? a : b; 
} 

public static void main(String[] args) 
{ 
	LongestCommonSubsequence lcs = new LongestCommonSubsequence(); 
	String s1 = "AGGTAB"; 
	String s2 = "GXTXAYB"; 

	char[] X=s1.toCharArray(); 
	char[] Y=s2.toCharArray(); 
	int m = X.length; 
	int n = Y.length; 

	System.out.println("Length of LCS is" + " " + 
								lcs.lcs( X, Y, m, n ) ); 
} 

} 



# Dynamic Programming implementation of LCS problem 

def lcs(X , Y): 
	# find the length of the strings 
	m = len(X) 
	n = len(Y) 

	# declaring the array for storing the dp values 
	L = [[None]*(n+1) for i in xrange(m+1)] 

	"""Following steps build L[m+1][n+1] in bottom up fashion 
	Note: L[i][j] contains length of LCS of X[0..i-1] 
	and Y[0..j-1]"""
	for i in range(m+1): 
		for j in range(n+1): 
			if i == 0 or j == 0 : 
				L[i][j] = 0
			elif X[i-1] == Y[j-1]: 
				L[i][j] = L[i-1][j-1]+1
			else: 
				L[i][j] = max(L[i-1][j] , L[i][j-1]) 

	# L[m][n] contains the length of LCS of X[0..n-1] & Y[0..m-1] 
	return L[m][n] 
#end of function lcs 


# Driver program to test the above function 
X = "AGGTAB"
Y = "GXTXAYB"
print "Length of LCS is ", lcs(X, Y) 

打印最长的公共子序列

以下是打印LCS的详细算法。它使用相同的2D表L [] []。

1)构造L [m + 1] [n + 1] 。

2)值L [m] [n]包含LCS的长度。创建一个字符数组lcs [],其长度等于lcs的长度加1(用于存储\ 0的字符数组)。

3)从L [m] [n]开始遍历2D数组。对每个单元格L [i] [j]
a执行以下操作:**a)**如果对应于L [i] [j]的字符(在X和Y中)相同(或X [i-1] == Y [j- 1]),然后将此字符作为LCS的一部分。
…… **b)**否则比较L [i-1] [j]和L [i] [j-1]的值并朝更大的方向前进。
动态规划法解最长公共子序列问题_第3张图片

/* Dynamic Programming implementation of LCS problem */
#include 
#include 
#include 
using namespace std; 

/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
void lcs( char *X, char *Y, int m, int n ) 
{ 
int L[m+1][n+1]; 

/* Following steps build L[m+1][n+1] in bottom up fashion. Note 
	that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1] */
for (int i=0; i<=m; i++) 
{ 
	for (int j=0; j<=n; j++) 
	{ 
	if (i == 0 || j == 0) 
		L[i][j] = 0; 
	else if (X[i-1] == Y[j-1]) 
		L[i][j] = L[i-1][j-1] + 1; 
	else
		L[i][j] = max(L[i-1][j], L[i][j-1]); 
	} 
} 

// Following code is used to print LCS 
int index = L[m][n]; 

// Create a character array to store the lcs string 
char lcs[index+1]; 
lcs[index] = '\0'; // Set the terminating character 

// Start from the right-most-bottom-most corner and 
// one by one store characters in lcs[] 
int i = m, j = n; 
while (i > 0 && j > 0) 
{ 
	// If current character in X[] and Y are same, then 
	// current character is part of LCS 
	if (X[i-1] == Y[j-1]) 
	{ 
		lcs[index-1] = X[i-1]; // Put current character in result 
		i--; j--; index--;	 // reduce values of i, j and index 
	} 

	// If not same, then find the larger of two and 
	// go in the direction of larger value 
	else if (L[i-1][j] > L[i][j-1]) 
		i--; 
	else
		j--; 
} 

// Print the lcs 
cout << "LCS of " << X << " and " << Y << " is " << lcs; 
} 

/* Driver program to test above function */
int main() 
{ 
char X[] = "AGGTAB"; 
char Y[] = "GXTXAYB"; 
int m = strlen(X); 
int n = strlen(Y); 
lcs(X, Y, m, n); 
return 0; 
} 

/* Dynamic Programming Java implementation of LCS problem */
public class LongestCommonSubsequence 
{ 

/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char[] X, char[] Y, int m, int n ) 
{ 
	int L[][] = new int[m+1][n+1]; 

	/* Following steps build L[m+1][n+1] in bottom up fashion. Note 
		that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1] */
	for (int i=0; i<=m; i++) 
	{ 
	for (int j=0; j<=n; j++) 
	{ 
		if (i == 0 || j == 0) 
			L[i][j] = 0; 
		else if (X[i-1] == Y[j-1]) 
			L[i][j] = L[i-1][j-1] + 1; 
		else
			L[i][j] = max(L[i-1][j], L[i][j-1]); 
	} 
	} 
return L[m][n]; 
} 

/* Utility function to get max of 2 integers */
int max(int a, int b) 
{ 
	return (a > b)? a : b; 
} 

public static void main(String[] args) 
{ 
	LongestCommonSubsequence lcs = new LongestCommonSubsequence(); 
	String s1 = "AGGTAB"; 
	String s2 = "GXTXAYB"; 

	char[] X=s1.toCharArray(); 
	char[] Y=s2.toCharArray(); 
	int m = X.length; 
	int n = Y.length; 

	System.out.println("Length of LCS is" + " " + 
								lcs.lcs( X, Y, m, n ) ); 
} 

} 



# Dynamic programming implementation of LCS problem 

# Returns length of LCS for X[0..m-1], Y[0..n-1] 
def lcs(X, Y, m, n): 
	L = [[0 for x in xrange(n+1)] for x in xrange(m+1)] 

	# Following steps build L[m+1][n+1] in bottom up fashion. Note 
	# that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1] 
	for i in xrange(m+1): 
		for j in xrange(n+1): 
			if i == 0 or j == 0: 
				L[i][j] = 0
			elif X[i-1] == Y[j-1]: 
				L[i][j] = L[i-1][j-1] + 1
			else: 
				L[i][j] = max(L[i-1][j], L[i][j-1]) 

	# Following code is used to print LCS 
	index = L[m][n] 

	# Create a character array to store the lcs string 
	lcs = [""] * (index+1) 
	lcs[index] = "" 

	# Start from the right-most-bottom-most corner and 
	# one by one store characters in lcs[] 
	i = m 
	j = n 
	while i > 0 and j > 0: 

		# If current character in X[] and Y are same, then 
		# current character is part of LCS 
		if X[i-1] == Y[j-1]: 
			lcs[index-1] = X[i-1] 
			i-=1
			j-=1
			index-=1

		# If not same, then find the larger of two and 
		# go in the direction of larger value 
		elif L[i-1][j] > L[i][j-1]: 
			i-=1
		else: 
			j-=1

	print "LCS of " + X + " and " + Y + " is " + "".join(lcs) 

# Driver program 
X = "AGGTAB"
Y = "GXTXAYB"
m = len(X) 
n = len(Y) 
lcs(X, Y, m, n) 



你可能感兴趣的:(数据结构及算法,动态规划,算法)