会慢慢写最长公共子序列、最短编辑距离等与字符串相关的算法,其实也就是数组相关的算法...
一、最长公共子序列
Solve1里递推公式为:
dp[i][j] = 0 if i = 0 or j = 0
dp[i][j] = dp[i-1][j-1]+1 if s1[i-1] = s2[j-1]
dp[i][j] = max{dp[i-1][j],dp[i][j-1]} if s1[i-1] != s2[j-1]
这里i、j并不是s1、s2中字符下标而是已经s1前i个字符与s2前j个字符,dp[i][j]表示s1前i个字符与s2前j个字符最长公共子序列
Solve1只打印了最长公共子序列的其中一个,没有考虑多个相同的最长公共子序列
Solve2稍微变换了递推公式:
dp[i][j]表示s1前i+1个字符与s2前j+1个字符的最长公共子序列,没什么太大变换
Solve3只用了一维数组,减少了空间复杂度
Solve4是递归写法
#include
#include
#include
//#define _DEBUG
int max(int a, int b){
return a>b?a:b;
}
void Solve1(char s1[], char s2[]){
int i,j;//i、j表示扫过的s1、s2长度
int len1 = strlen(s1);
int len2 = strlen(s2);
int dp[len1+1][len2+1];
for(i = 0; i <= len1; ++i) dp[i][0] = 0;
for(j = 0; j <= len2; ++j) dp[0][j] = 0;
for(i = 1; i <= len1; ++i){
for(j = 1; j <= len2; ++j){
if(s1[i-1] == s2[j-1]){//是i-1 与 j-1
dp[i][j] = dp[i-1][j-1] + 1;
}else{
dp[i][j] = max(dp[i-1][j],dp[i][j-1]);
}
}
}
#ifdef _DEBUG
for(i = 0; i <= len1; ++i){
for(j = 0; j<= len2; ++j){
printf("%d ",dp[i][j]);
}
printf("\n");
}
#endif// _DEBUG
printf("Solve1:%d\n",dp[len1][len2]);
for(i = len1; i >= 0; ){
for(j = len2; j >= 0; ){
if(s1[i-1] == s2[j-1]){
printf("%c ",s1[i-1]);
--i;
--j;
}else{
if(dp[i][j-1] > dp[i-1][j]){
--j;
}else{
--i;
}
}
}
}
printf("\n");
}
/*错误
//dp[][0]与dp[0][]错误
void Solve2(char s1[], char s2[]){
int i,j;
int len1 = strlen(s1);
int len2 = strlen(s2);
int dp[len1][len2];
for(i = 0; i < len1; ++i) dp[i][0] = s1[i]==s2[0]?1:0;
for(j = 0; j < len2; ++j) dp[0][j] = s1[0]==s2[j]?1:0;
for(i = 1; i < len1; ++i){
for(j = 1; j < len2; ++j){
if(s1[i] == s2[j]){
dp[i][j] = dp[i-1][j-1] + 1;
}else{
dp[i][j] = max(dp[i-1][j],dp[i][j-1]);
}
}
}
for(i = 0; i < len1; ++i){
for(j = 0; j< len2; ++j){
printf("%d ",dp[i][j]);
}
printf("\n");
}
printf("%d\n",dp[len1-1][len2-1]);
}
*/
void Solve2(char s1[], char s2[]){
int i,j;//i、j是s1 s2下标
int len1 = strlen(s1);
int len2 = strlen(s2);
int dp[len1][len2];
int dp1 = 0;
int dp2 = 0;
dp[0][0] = s1[0]==s2[0]?1:0;
for(i = 1; i < len1; ++i){
if(dp1 == 0 && s1[i] == s2[0]) dp1 = 1;
dp[i][0] = dp1;
}
for(j = 1; j < len2; ++j){
if(dp2 == 0 && s1[0] == s2[j]) dp2 = 1;
dp[0][j] = dp2;
}
for(i = 1; i < len1; ++i){
for(j = 1; j < len2; ++j){
if(s1[i] == s2[j]){
dp[i][j] = dp[i-1][j-1] + 1;
}else{
dp[i][j] = max(dp[i-1][j],dp[i][j-1]);
}
}
}
#ifdef _DEBUG
for(i = 0; i < len1; ++i){
for(j = 0; j< len2; ++j){
printf("%d ",dp[i][j]);
}
printf("\n");
}
#endif // _DEBUG
printf("Solve2:%d\n",dp[len1-1][len2-1]);
}
void Solve3(char s1[], char s2[]){
int i,j;
int len1 = strlen(s1);
int len2 = strlen(s2);
int dp[len2+1];
for(j = 0; j <= len2; ++j){
dp[j] = 0;
}
for(i = 1; i <= len1; ++i){
for(j = 1; j <= len2; ++j){
if(s1[i-1] == s2[j-1]){
dp[j] = dp[j-1] + 1;
}else{
dp[j] = max(dp[j],dp[j-1]);
}
}
}
#ifdef _DEBUG
for(j = 0; j<= len2; ++j) printf("%d ",dp[j]);
printf("\n");
#endif // _DEBUG
printf("Solve3:%d\n",dp[len2]);
}
int Solve4(char s1[], char s2[], int i, int j,int dp[][20]){
//递归求解
if(i==0 || j==0){
dp[i][j] = 0;
return dp[i][j];
}
if(dp[i][j] != -1){
return dp[i][j];
}
if(s1[i-1] == s2[j-1]){
dp[i][j] = Solve4(s1,s2,i-1,j-1,dp)+1;
return dp[i][j];
}
dp[i][j] = max(Solve4(s1,s2,i-1,j,dp),Solve4(s1,s2,i,j-1,dp));
return dp[i][j];
}
int main(){
char s1[20] = "abcdef";
char s2[20] = "dgajchdef";
int dp[20][20];
memset(dp, -1, sizeof(dp));
Solve1(s1,s2);
Solve2(s1,s2);
Solve3(s1,s2);
printf("Solve4:%d",Solve4(s1,s2,strlen(s1),strlen(s2),dp));
return 0;
}
二、编辑距离
LeetCode原题:
72. Edit Distance
Given two words word1 and word2, find the minimum number of steps required to convert word1 to word2. (each operation is counted as 1 step.)
You have the following 3 operations permitted on a word:
a) Insert a character
b) Delete a character
c) Replace a character
递推公式:
dp[i][j]表示长度为i与长度为j的字符串的编辑距离,对应原题即word1(2)中前i(j)个字符组成的字符串:
dp[i][j] = min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1])+1 if word1[i-1] != word2[j-1]
dp[i][j] = dp[i-1][j-1] if word1[i-1] == word2[j-1]
也可以写成dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1] + word1[i-1]==word2[j-1]?0:1),但这种写法感觉上似乎并不是很好,因为在word1[i-1] == word2[j-1]时这种写法还在dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1] + word1[i-1]三者之中取最小,并没有意识到当word1[i-1] == word2[j-1]时dp[i][j] = dp[i-1][j-1]一定成立!!!
下面是碎碎念,可忽略:
开始想着 dp[i-1][j-1] + word1[i-1]==word2[j-1]?0:1是否一定会小于或等于dp[i-1][j]+1, dp[i][j-1]+1,如果成立那么动态规划就可以变成贪心算法,但是并不成立,可找到反例。从下面证明中也可以得到答案。
首先证明dp[i-1][j-1]<=dp[i-1][j]+1:
w1'经过dp[i-1][j]步可变为w2,再删除w2末位字符得到w2',所以经过dp[i-1][j]+1不w1'可转换为w2',而dp[i-1][j-1]为w1',w2'的编辑距离,因而dp[i-1][j]+1>=dp[i-1][j-1]一定成立,所以没必要像第二种递推公式那样比较dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]。
dp[i-1][j-1]<=dp[i-1][j]一定成立吗?
dp[i-1][j-1]
两个答案都是不!简单的反例:word1=“A”,word2=“A”,dp[1][0]=1,dp[1][1]=0,dp[i-1][j-1]>dp[i-1][j]
总结:dp[i-1][j-1]<=dp[i-1][j]+1并且等号可成立
以上只是比较了dp[i-1][j-1]与dp[i-1][j]直接的大小,并未讨论dp[i][j]什么情况下与dp[i-1][j-1]相等。
以下是正式的证明递推公式:
设字符串w1、w2长度分别为i、j,且w1、w2末位字符相同即w1[i-1]=w2[j-1],w1前i-1个字符组成的字符串为w1',w2前j-1个字符组成的字符串为w2',w1、w2编辑距离为dp[i][j],w1'与w2编辑距离为dp[i-1][j],w1'与w2'编辑距离为dp[i-1][j-1]。
int min(int a, int b){
return a