网上对kmp原理的介绍:找到匹配失败时的最合适的回退位置,而不是简单的回退到子串的第一个字符(常规的枚举查找方式,是简单的回退到子串的第一个字符),即可提高查找的效率.
因此为了找到这个合适的位置,先对子串预处理,从而得到一个回退位置的数组。
理解原理后,在写代码。python实现
代码如下:
# -*- coding: cp936 -*- #--------------------------------------------- # - #author chile - #version 1.0 - #since - #date 2014-02-17 - #desc kmp查找字符串 - # - # - # - #--------------------------------------------- #对子串加以预处理,找到匹配失败时子串回退的位置 def preprocess(patter): length = len(patter) p = handlerList(length) j = -1 p[0] = j for i in range(1,length): j = p[i - 1] while j >= 0 and patter[i] != patter[j+1]: j = p[j] if patter[i] == patter[j+1]: #含有重复字符时,回退的位置+1 p[i] = j + 1 else: p[i] = -1 return p #初始化一个元组 def handlerList(len,default = 0): if len <= 0: return p = [] for i in range(len): p.append(default) return p def kmp(value,pattern): srcSize = len(value) subSize = len(pattern) p = preprocess(pattern) tIndex , pIndex ,total = 0 , 0 , 0 while tIndex < srcSize and pIndex < subSize: #找到合适的回退位置 if (value[tIndex] == pattern[pIndex]): tIndex += 1 pIndex += 1 elif pIndex == 0: tIndex += 1 else: pIndex = p[pIndex - 1] + 1 if pIndex == subSize: #输出匹配结果,并且让比较继续下去 pIndex = 0 total += 1 print 'find',pattern,'at',(tIndex - subSize) print 'find times ' , total var = 'abadfafdewefwfafd' pattern ='afd' kmp(var,pattern)