又称Levenshtein距离(也叫做Edit Distance),是指两个字串之间,由一个转成另一个所需的最少编辑操作次数。许可的编辑操作包括将一个字符替换成另一个字符,插入一个字符,删除一个字符。
例如将kitten一字转成sitting:
sitten(k→s)
sittin(e→i)
sitting(→g)
#include <iostream> using namespace std; int cacDistance(const char *lhs, int lBegin, int lEnd, const char *rhs, int rBegin, int rEnd) { if (lBegin > lEnd) { if (rBegin > rEnd) return 0; else return rEnd - rBegin + 1; } if (rBegin > rEnd) { if (lBegin > lEnd) return 0; else return lEnd - lBegin + 1; } if (*(lhs + lBegin) == *(rhs + rBegin)) return cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin + 1, rEnd); else { int t1 = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin + 1, rEnd); int t2 = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin, rEnd); int t3 = cacDistance(lhs, lBegin, lEnd, rhs, rBegin + 1, rEnd); return min(t1, min(t2, t3)) + 1; } } void main() { const char *str1 = "kitten"; const char *str2 = "sitting"; int result = cacDistance(str1, 0, strlen(str1) - 1, str2, 0, strlen(str2) - 1); cout << "result: " << result << endl; }
用动态规划来解此题,代码如下:
#include <iostream> using namespace std; int info[10][10]; void print(int size1, int size2) { for (int i = 1; i <= size1; i++) { for (int j = 1; j <= size2; j++) { cout << info[i][j] << " "; } cout << endl; } } int cacDistance(const char *str1, const char *str2) { if (str1 == NULL || str2 == NULL) return -1; int size1 = strlen(str1); int size2 = strlen(str2); for (int i = 0; i < size1; i++) info[i][size2] = 0; for (int j = 0; j < size2; j++) info[size1 - 1][j] = 0; for (int i = 1; i <= size1; i++) { for (int j = 1; j <= size2; j++) { if (str1[i - 1] == str2[j - 1]) info[i][j] = info[i - 1][j - 1]; else if (str1[i - 1] != str2[j - 1]) info[i][j] = min(info[i - 1][j - 1], min(info[i - 1][j], info[i][j - 1])) + 1; } } print(size1, size2); return info[size1][size2]; } void main() { const char *str1 = "kitten"; const char *str2 = "sitting"; int result = cacDistance(str1, str2); cout << "result: " << result << endl; }
递归的建表解法,也就是书上说的避免重复计算解法
#include <iostream> using namespace std; int info[10][10]; int INF = -1; int cacDistance(const char *lhs, int lBegin, int lEnd, const char *rhs, int rBegin, int rEnd) { if (lBegin == lEnd) { return info[lBegin + 1][rBegin + 1] + rEnd - rBegin; } else if (rBegin == rEnd) { return info[lBegin + 1][rBegin + 1] + lEnd - lBegin; } if (info[lBegin + 1][rBegin + 1] != -1) { return info[lBegin + 1][rBegin + 1]; } if (*(lhs + lBegin) == *(rhs + rBegin)) info[lBegin][rBegin] = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin + 1, rEnd); else { int t1 = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin + 1, rEnd); int t2 = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin, rEnd); int t3 = cacDistance(lhs, lBegin, lEnd, rhs, rBegin + 1, rEnd); info[lBegin][rBegin] = min(t1, min(t2, t3)) + 1; } return info[lBegin][rBegin]; } void main() { const char *str1 = "kitten"; const char *str2 = "sitting"; int size1 = strlen(str1); int size2 = strlen(str2); for (int i = 0; i <= size1; i++) info[i][size2 - 1] = 0; for (int j = 0; j <= size2; j++) info[size1 - 1][j] = 0; for (int i = 1; i <= size1; i++) { for (int j = 1; j <= size2; j++) { info[i][j] = -1; } } int result = cacDistance(str1, 0, size1, str2, 0, size2); cout << "result: " << result << endl; }