#!/user/bin/env python # -*- coding: utf-8 -*- class arithmetic(): def __init__(self): pass ''' 【编辑距离算法】 【levenshtein distance】 【字符串相似度算法】 ''' def levenshtein(self,first,second): if len(first) > len(second): first,second = second,first if len(first) == 0: return len(second) if len(second) == 0: return len(first) first_length = len(first) + 1 second_length = len(second) + 1 distance_matrix = [range(second_length) for x in range(first_length)] #print distance_matrix for i in range(1,first_length): for j in range(1,second_length): deletion = distance_matrix[i-1][j] + 1 insertion = distance_matrix[i][j-1] + 1 substitution = distance_matrix[i-1][j-1] if first[i-1] != second[j-1]: substitution += 1 distance_matrix[i][j] = min(insertion,deletion,substitution) #print distance_matrix return distance_matrix[first_length-1][second_length-1] def lcs(self,first,second): first_length = len(first) second_length = len(second) size = 0 x = 0 y = 0 matrix = [range(second_length) for x in range(first_length)] #print matrix for i in range(first_length): for j in range(second_length): #print i,j if first[i] == second[j]: if i - 1 >= 0 and j - 1 >=0: matrix[i][j] = matrix[i-1][j-1] + 1 else: matrix[i][j] = 1 if matrix[i][j] > size: size = matrix[i][j] x = j y = i else: matrix[i][j] = 0 #print matrix #print size,x,y return second[x-size+1:x+1] if __name__ == "__main__": arith = arithmetic() print arith.levenshtein('GUMBOsdafsadfdsafsafsadfasfadsfasdfasdfs','GAMBOL00000000000dfasfasfdafsafasfasdfdsa') print arith.lcs('GUMBOsdafsadfdsafsafsadfasfadsfasdfasdfs','GAMBOL00000000000dfasfasfdafsafasfasdfdsa')
#Longest Common String 【最长公共字符串算法】 def lcs(self,first,second): first_length = len(first) #the first string's length second_length = len(second)#the second string's length size = 0 #length of the max string x = 0 y = 0 li = [0 for x in range(second_length)] for i in range(first_length): temp = li print temp li = [0 for x in range(second_length)] for j in range(second_length): if first[i] == second[j]: if i - 1 >= 0 and j - 1 >=0: li[j] = temp[j-1] + 1 #matrix[i][j] = matrix[i-1][j-1] + 1 else: li[j] = 1 if li[j] > size: size = li[j] # max length x = j # X-axis y = i # Y-axis else: li[j] = 0 #print size,x,y return second[x-size+1:x+1]
参考:http://henryouly.blogspot.com/2006/10/blog-post_895.html
http://space.itpub.net/16857/viewspace-79033
http://hellobmw.com/archives/dynamic-programming-longest-common-substring.html
http://en.wikipedia.org/wiki/Longest_common_substring_problem
http://www.allisons.org/ll/AlgDS/