今天实现的算法是求解最长公共子序列,在字母表Σ上,分别给出两个长度为n和m的字符串A和B,确定在A和B中最长公共子序列的长度并输出该最长公共子序列。这里,A=a1a2...an的子序列是一个形式为ai1ai2...aik的字符串,其中每个ij都在1和n之间,并且1<=i1<i2<...<ik>=n,子序列不是子串,并不要求连续。例如zxyxyz和xyyzx的最长公共子序列为xyyz。
此问题可以用动态规划技术来解决,为了使用动态规划,首先得有一个递推公式。令A = a1a2...an好B = b1b2...bn,令L[i,j]表示a1a2...ai和b1b2...bi的最长公共子序列的长度。i、j可能为0,即表示空串。则i=0或j=0时,L[i,j]=0。递推关系如下:
如果i和j都大于0,那么
for i ← 0 to n: L[i,0] ← 0 end for for j ← 0 to n: L[0,j] ← 0 end for for i ← 1 to n: for j ← 1 to m: if ai == bi then L[i,j] ← L[i-1,j-1] + 1 else L[i,j] ← max{L[i,j-1],L[i-1,j]} end if end for end for while i <= n and j <= m: if ai == bi then 输出ai i=i+1 j=j+1 else if L[i+1,j] > L[i][j+1] then i = i+1 else j = j+1 end while return L[n,m]
//main.cpp #include <iostream> #include "LCSString.h" using std::cout; using std::endl; int main(void) { LCSString s1("xyxxzxyzxy"); std::string s2("zxzyyzxxyxxz"); std::string s = s1.getLCS(s2); cout << s1 << "和" << s2 << "的一个最长公共子序列为:"; cout << s << endl; getchar(); return 0; }
//LCSString.h #pragma once #include <string> class LCSString:public std::string { public: LCSString(void); LCSString(const char * _Ptr); ~LCSString(void); std::string getLCS(std::string s); };
//LCSString.cpp #include "LCSString.h" #include <iostream> using std::cout; using std::endl; LCSString::LCSString(void):std::string() { } LCSString::LCSString(const char * _Ptr):std::string(_Ptr) { } LCSString::~LCSString(void) { } //求本字符串与另一个字符串s的最长公共子序列 std::string LCSString::getLCS(std::string s) { std::string result; int n_len = this->length()+1; int m_len = s.length()+1; int ** L = new int * [n_len]; for (int i = 0; i < n_len; i++) { L[i] = new int [m_len]; } for (int i = 0; i < n_len; i++) { L[i][0] = 0; } for (int i = 0; i < m_len; i++) { L[0][i] = 0; } for (int i = 1; i < n_len; i++) { for (int j = 1;j < m_len; j ++) { if (this->operator[](i-1) == s[j-1]) { L[i][j] = L[i-1][j-1] + 1; } else { //L[i][j]取L[i][j-1]和L[i-1][j]的最大值 L[i][j] = (L[i][j-1] > L[i-1][j]?L[i][j-1]:L[i-1][j]); } } } for (int i = 0; i < n_len; i++) { for (int j = 0; j < m_len; j++) { cout << L[i][j] << " "; } cout << endl; } int i = n_len - 1; int j = m_len - 1; while (i > 0 && j > 0) { if (this->operator[](i-1) == s[j-1]) { result = s[j-1] + result; i --; j --; } else if (L[i-1][j] == L[i][j-1]) { i --; } else { j --; } } for (int i = 0; i < n_len; i++) { delete [] L[i]; } delete [] L; return result; }
下面是Java版本实现:
package lcs; public class LCSString { private String s1; public LCSString(String s) { this.s1 = new String(s); } public String getLCSString(String s2) { String result = new String(); int n = s1.length()+1; int m = s2.length()+1; int [][] L = new int [n][m]; for (int i = 0; i < n; i++) { L[i][0] = 0; } for (int i = 0; i < m; i++) { L[0][i]= 0; } for (int i = 1; i < n; i++) { for (int j = 1; j < m; j++) { if (s1.charAt(i-1) == s2.charAt(j-1)) { L[i][j]= L[i-1][j-1] + 1; } else { //L[i][j]取L[i][j-1]和L[i-1][j]的最大值 L[i][j] = (L[i][j-1] > L[i-1][j]?L[i][j-1]:L[i-1][j]); } } } int i = n - 1; int j = m - 1; while (i > 0 && j > 0) { if (s1.charAt(i-1) == s2.charAt(j-1)) { result = s1.charAt(i-1) + result; i --; j --; } else if (L[i-1][j] == L[i][j-1]) { i --; } else { j --; } } return result; } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub String s1 = "xyxxzxyzxy"; String s2 = "zxzyyzxxyxxz"; LCSString lcs = new LCSString(s1); System.out.println(s1 + "和" + s2 + "的一个最长公共子序列为:" + lcs.getLCSString(s2)); } }
下面是Python版本实现:
#! /usr/bin/env python # -*- coding:utf-8 -*- class LCSString: def __init__(self, s): self.s1 = str(s) def getLCSString(self, s2): result = '' n = len(self.s1)+1 m = len(s2)+1 L = [0]*n for i in range(0, n): L[i] = [0] * m for i in range(1, n): for j in range(1, m): if self.s1[i-1] == s2[j-1]: L[i][j] = L[i-1][j-1] + 1 else: L[i][j] = max(L[i-1][j], L[i][j-1]) print for i in L: for j in i: print j, print i = n-1 j = m-1 while i > 0 and j > 0: if self.s1[i-1] == s2[j-1]: result = s2[j-1] + result i -= 1 j -= 1 elif L[i-1][j] == L[i][j-1]: i -= 1 else: j -= 1 return result if __name__ == '__main__': s1 = "xyxxzxyzxy" s2 = "zxzyyzxxyxxz" lcs = LCSString(s1) print s1 + "和" + s2 + "的一个最长公共子序列为:" + lcs.getLCSString(s2)