kmp算法的使用 仅仅是使用 暂时不做深入的讲解

"""
问题 :我想匹配一个句子中某一个token
        1.是否出现过
       2.出现的次数
       3.出现的下标位置在哪里
       案例  ner标注的时候要给出token

求解的方案是 kmp
这里面我怎么理解那个next 指针???
"""
#实现我们的算法结构
def KMP_algorithm(string, substring):

    '''
    string:主字符串
    substring:匹配字符串
    KMP字符串匹配的主函数
    若存在字串返回字串在字符串中开始的位置下标,或者返回-1
    '''

    pnext = gen_pnext(substring)
    n = len(string)
    m = len(substring)
    i,j = 0,0
    while (i < n) and (j < m):
        if string[i] == substring[j]:
                                     #string = 'abcxabcdabcdabcy'
                                     #substring=    abcdabcy'   前缀移到后缀位置
            i += 1
            j += 1
        elif j != 0:
            j = pnext[j - 1]
        else:
            i += 1
    if j == m:
        return i - j
    else:
        return -1
def gen_pnext(substring):
    """
    构造临时数组p next     #abcdabcy
    """
    index, m = 0, len(substring)
    pnext = [0] * m
    i = 1
    while i < m:  #abcdabcy
        if substring[i] == substring[index]:
            pnext[i] = index + 1
            index += 1
            i += 1
        elif index != 0:
            index = pnext[index - 1]
        else:
            pnext[i] = 0
            i += 1
    return pnext
def count(string:str,substring:str):
    """
    :param string: 主串
    :param substring: 模式串
    :return: 返回一个下标以及出现次数
    """
    string = string
    substring = substring
    count = 0
    index = 0
    indexs = []
    while string:
        out = KMP_algorithm(string, substring)
        if out == -1:
            break
        out += index
        indexs.append([out,out+len(substring)])
        count += 1 #统计次数
        index = len(string[:out+len(substring)])
        string = string[out+len(substring):]
    return indexs,count
print(count("qwesdadasdasdadasrt","asd"))

 

你可能感兴趣的:(python)