KMP字符串匹配算法--Python版

参考大神文章:http://www.ruanyifeng.com/blog/2013/05/Knuth–Morris–Pratt_algorithm.html

# -*- coding: UTF-8 -*-
__author__ = 'jiang'

text1 = 'BBC ABCDAB SDFSDAETASFSAFAafgsdfhXZVVBAABAGAABCDABCDABDE'
findStr = 'ABCDABD'
data_index = {}

def findString():
    partTable()
    index = 0  #text1的位移
    findIndex = 1   #text1的迁移指针
    ptr_index = 1   #findStr的位移
    li = len(text1)
    pre_str = ''
    while index < li:
        if ptr_index > len(findStr):
            print('success find ' + findStr + ", ptr_index = " + str(ptr_index) + ", " + str(index) + ", " + str(findIndex))
            break
        ft = text1[index: index+findIndex]

        fs = findStr[0:ptr_index]
        if ft != fs:
            forward_index = 0
            if pre_str == '':   #如果是首次进入
                forward_index = data_index[fs]
                pre_str = fs
            else:
                forward_index = data_index[pre_str]
                pre_str = fs
            if forward_index == 0:
                index += 1
            else:
                index = ptr_index - forward_index
            findIndex = 1
            ptr_index = 1
            continue
        ptr_index += 1
        findIndex += 1


def partTable():
    data = []
    strleng = len(findStr)
    index = 1
    while index <= strleng:
        td = findStr[0:index]
        print('td='+td)
        index += 1
        if len(td) == 1:   #一个字符的前缀和后缀都是空集
            data_index[td] = 0
        else:
            td_length = len(td)
            tmp_dict = {}
            for i in range(1,td_length):
                pre_td = td[0:i]
                sufx_td = td[td_length - i : td_length]
                print(pre_td +'->' + sufx_td)
                if pre_td in tmp_dict.keys():
                    tmp_dict[pre_td] = len(pre_td)
                else:
                    tmp_dict[pre_td] = 0

                if sufx_td in tmp_dict.keys():
                    tmp_dict[sufx_td] = len(sufx_td)
                else:
                    tmp_dict[sufx_td] = 0
            total = 0
            for key in tmp_dict.keys():
                value = tmp_dict[key]
                if value > 0:
                    total += value
            data_index[td] = total
    print(str(data_index))



findString()
        

你可能感兴趣的:(算法,python)