Levenshtein 距离,又称编辑距离,指的是两个字符串之间,由一个转换成另一个所需的最少编辑操作次数。
许可的编辑操作包括将一个字符替换成另一个字符,插入一个字符,删除一个字符。
编辑距离的算法是首先由俄国科学家Levenshtein提出的,故又叫Levenshtein Distance。
1、Java
public static void levenshtein(String str1, String str2) { // 计算两个字符串的长度。 int len1 = str1.length(); int len2 = str2.length(); // 建立上面说的数组,比字符长度大一个空间 int[][] dif = new int[len1 + 1][len2 + 1]; // 赋初值,步骤B。 for (int a = 0; a <= len1; a++) { dif[a][0] = a; } for (int a = 0; a <= len2; a++) { dif[0][a] = a; } // 计算两个字符是否一样,计算左上的值 int temp; for (int i = 1; i <= len1; i++) { for (int j = 1; j <= len2; j++) { System.out.println("i = " + i + " j = " + j + " str1 = " + str1.charAt(i - 1) + " str2 = " + str2.charAt(j - 1)); if (str1.charAt(i - 1) == str2.charAt(j - 1)) { temp = 0; } else { temp = 1; } // 取三个值中最小的 dif[i][j] = min(dif[i - 1][j - 1] + temp, dif[i][j - 1] + 1, dif[i - 1][j] + 1); System.out.println("i = " + i + ", j = " + j + ", dif[i][j] = " + dif[i][j]); } } System.out.println("字符串\"" + str1 + "\"与\"" + str2 + "\"的比较"); // 取数组右下角的值,同样不同位置代表不同字符串的比较 System.out.println("差异步骤:" + dif[len1][len2]); // 计算相似度 float similarity = 1 - (float) dif[len1][len2] / Math.max(str1.length(), str2.length()); System.out.println("相似度:" + similarity); } </span></span>
2、LotusScript
Function toCompute(str1 As String ,str2 As String) As Double Dim len1 As Integer Dim len2 As Integer Dim maxlen As Integer Dim i As long Dim j As long Dim temp As long Dim similarity As Double If str1= "" Or str2 = "" Then toCompute = 0 Else len1 = Len(str1) len2 = Len(str2) Dim dif(0 To 120, 0 To 120) As Integer If len1 > 120 Then len1 = 120 End If If len2 > 120 Then len2 = 120 End If If len1 > len2 Then maxlen = len1 Else maxlen = len2 End If For i = 0 To len1 Step 1 dif(i,0) = i Next For i = 0 To len2 Step 1 dif(0,i) = i Next For i = 1 To len1 Step 1 For j = 1 To len2 Step 1 'Print "i = "& i & " j = " & j &" str1 = " & Right$(Left$(str1,i),1) &" str2 = " &Right$(Left$(str2,j),1) If Right$(Left$(str1,i),1) = Right$(Left$(str2,j),1) Then temp = 0 Else temp = 1 End If dif(i,j) = min(dif(i-1,j-1)+ temp ,dif(i,j-1)+1,dif(i-1,j)+1) Next Next 'Print "差异步骤: " & dif(len1 ,len2) similarity = 1 - dif(len1 ,len2 )/maxlen 'Print "差异度:" & similarity toCompute = similarity 'Call toLogFile("str1 = " & str1 &" str2 = " &str2 & " 相似度: " & similarity) End If End Function </span></span>
优化
1、Visual Basic
Module Module1 Sub Main() Dim str1 As String Dim str2 As String str1 = "今天是星期五" str2 = "明天星期四" Dim dis As New clsDistance(str1, str2) Dim result As Integer result = dis.CacuDistance() Console.WriteLine(result) End Sub Public Class clsDistance Private mCharA() As Char Private mCharB() As Char Private mCharALen As Integer Private mCharBLen As Integer Public Sub New(ByVal StrA As String, ByVal StrB As String) mCharA = StrA.ToCharArray mCharB = StrB.ToCharArray mCharALen = mCharA.Length mCharBLen = mCharB.Length End Sub Public Function CacuDistance() As Integer Dim i As Integer If mCharALen = 0 Then Return mCharBLen If mCharBLen = 0 Then Return mCharALen Console.WriteLine(mCharALen) Console.WriteLine(mCharBLen) Dim j As Integer = Min(mCharALen, mCharBLen) - 1 Dim tP1 As Integer, tP2 As Integer tP1 = -1 tP2 = -1 For i = 0 To j If mCharA(i) <> mCharB(i) Then tP1 = i Exit For End If Next If tP1 = -1 Then Return Math.Abs(mCharALen - mCharBLen) For i = 0 To j - tP1 If mCharA(mCharALen - i - 1) <> mCharB(mCharBLen - i - 1) Then tP2 = i Exit For End If Next If tP2 = -1 Then Return Math.Abs(mCharALen - mCharBLen) Console.WriteLine("tp1: = " & tP1) Console.WriteLine("tp2 : = " & tP2) Dim tA(mCharALen - tP1 - tP2) As Integer For i = 0 To tA.GetUpperBound(0) tA(i) = i Next For i = 0 To tA.GetUpperBound(0) Step 1 Console.WriteLine(" i = " + CStr(i) + " " & tA(i)) Next Console.WriteLine("Bound: = " & tA.GetUpperBound(0)) Dim tN1 As Integer, tN2 As Integer, tN3 As Integer For i = 0 To mCharBLen - tP1 - tP2 - 1 tN1 = tA(0) tN2 = tN1 + 1 Console.WriteLine("i = " & i & " " & mCharB(mCharBLen - tP2 - i - 1)) For j = 1 To tA.GetUpperBound(0) Console.WriteLine("j = " & j & " " & mCharA(mCharALen - tP2 - j)) If mCharA(mCharALen - tP2 - j) = mCharB(mCharBLen - tP2 - i - 1) Then tN3 = tN1 Else tN3 = Min(tA(j), tN1, tN2) + 1 End If tA(j - 1) = tN2 tN2 = tN3 tN1 = tA(j) Console.WriteLine("tn1 = " & tN1) Console.WriteLine("tn2 = " & tN2) Console.WriteLine("tn3 = " & tN3) Next tA(tA.GetUpperBound(0)) = tN2 Console.WriteLine(tA.GetUpperBound(0) & " " & tA(tA.GetUpperBound(0))) Next For i = 0 To tA.GetUpperBound(0) Step 1 Console.WriteLine(" i = " + CStr(i) + " " & tA(i)) Next Return tA(tA.GetUpperBound(0)) End Function Public Function Min(ByVal ParamArray Num() As Integer) As Integer Dim tN As Integer, i As Integer If Num.Length = 0 Then Return Nothing tN = Num(0) For i = 1 To Num.GetUpperBound(0) If Num(i) < tN Then tN = Num(i) Next Return tN End Function End Class End Module </span>
2、Java
public static int clsDistance(String str1, String str2) { int j; int i; int mCharALen, mCharBLen; mCharALen = str1.length(); mCharBLen = str2.length(); int tp1 = -1; int tp2 = -1; j = Math.min(mCharALen , mCharBLen) - 1; for (i = 0; i <= j; i++) { if (str1.charAt(i) != str2.charAt(i)) { tp1 = i; break; } } if (tp1 == -1) { return Math.abs(mCharBLen - mCharALen); } for (i = 0; i <= j - tp1; i++) { if (str1.charAt(mCharALen - i - 1) != str2.charAt(mCharBLen - i - 1)) { tp2 = i; break; } } if (tp2 == -1) { return Math.abs(mCharALen - mCharBLen); } int taBound = mCharALen - tp1 - tp2; int tA[] = new int[taBound + 1]; for (i = 0; i < tA.length; i++) { tA[i] = i ; } System.out.println(Arrays.toString(tA)); int tN1, tN2, tN3; for (i = 0; i < mCharBLen - tp1 - tp2 ; i++) { tN1 = tA[0]; tN2 = tN1 + 1; System.out.println("\n" + i + " " + str2.charAt(mCharBLen - tp2 - i - 1)); for (j = 1; j < tA.length ; j++) { System.out.print(str1.charAt(mCharALen - tp2 - j ) +" "); if (str1.charAt(mCharALen - tp2 - j ) == str2.charAt(mCharBLen - tp2 - i - 1)) { tN3 = tN1; } else { tN3 = Math.min(tA[j], Math.min(tN1, tN2)) + 1; } tA[j - 1] = tN2; tN2 = tN3; tN1 = tA[j]; System.out.println("\ntN1 = " + tN1); System.out.println("tN2 = " + tN2); System.out.println("tN3 = " + tN3); } tA[tA.length - 1] = tN2; System.out.println("\n"+tA[tA.length - 1] ); } System.out.println("\n" +Arrays.toString(tA)); return tA[tA.length - 1]; }</span>
3、Lotus Script
%REM Function clsDistance Description: Comments for Function %END REM Function clsDistance(str1 As String ,str2 As String) As Double Dim mCharALen As Integer Dim mCharBLen As Integer Dim i As Integer Dim simularity As Double Dim maxlen As Integer mCharALen = Len(str1) mCharBLen = Len(str2) If mCharALen > mCharBLen Then maxlen = mCharALen Else maxlen = mCharBLen End If If str1= "" Or str2 = "" Then clsDistance = 0 Exit function End If Dim j As Integer If mCharALen > mCharBLen Then j = mCharBLen - 1 Else j = mCharALen - 1 End If Dim tP1 , tP2 As Integer tP1 = -1 tP2 = -1 For i = 0 To j Step 1 If Right$(Left$(str1,i+1),1) <> Right$(Left$(str2,i+1),1) Then tP1 = i Exit For End If Next If tP1 = -1 Then clsDistance = 1 - Abs(mCharALen - mCharBLen) / maxlen Exit Function End If For i = 0 To j - tP1 If Right$(Left$(str1,mCharALen - i),1) <> Right$(Left$(str2,mCharBLen - i),1) Then tP2 = i Exit For End If Next If tP2 = -1 Then clsDistance = 1 - Abs(mCharALen - mCharBLen) / maxlen Exit Function End If Dim tA(15000) As Integer Dim tABound As Integer tABound = mCharALen - tP1 - tP2 + 1 For i = 0 To tABound Step 1 tA(i) = i Next Dim tN1 As Integer, tN2 As Integer, tN3 As Integer For i = 0 To mCharBLen - tP1 - tP2 tN1 = tA(0) tN2 = tN1 + 1 For j = 1 To tABound If Right$(Left$(str1,mCharALen - tP2 - j + 1),1) = Right$(Left$(str2,mCharBLen - tP2 - i),1) Then tN3 = tN1 Else tN3 = Min(tA(j), tN1, tN2) + 1 End If tA(j - 1) = tN2 tN2 = tN3 tN1 = tA(j) Next tA(tABound) = tN2 Next simularity = 1 - tA(tABound) / maxlen clsDistance = simularity End Function</span>