Rabin-Karp算法学习

Rabin-Karp算法

# coding: utf-8


class RabinKarp(object):
    def __init__(self, pat, txt):
        self.pat_hash = -1
        self.M = len(pat)
        self.Q = 997
        self.R = 10
        self.txt = txt
        self.pat = pat

        # RM = R^(M-1)%Q
        self.RM = 1
        for i in range(self.M-1):
            self.RM = (self.R * self.RM) % self.Q

        print 'RM: %s' % self.RM

        self.pat_hash = self.hash(pat, self.M)
        print 'pat hash: %s' % self.pat_hash

    def hash(self, key, m):
        h = 0
        for x in key[:m]:
            h = (self.R*h + int(x)) % self.Q

        return h

    def check(self, i):
        if self.txt[i:(i+self.M)] != self.pat:
            return False

        return True

    def search(self):
        N = len(self.txt)
        txt_hash = self.hash(self.txt, self.M)
        print 'txt hash: %s' % txt_hash

        if N < self.M:
            return -1

        if txt_hash == self.pat_hash and self.check(0):
            return 0

        for i in range(self.M, N):
            txt_hash = (txt_hash + self.Q - self.RM*int(self.txt[i-self.M])) % self.Q
            txt_hash = (txt_hash*self.R + int(self.txt[i])) % self.Q

            print txt_hash

            if txt_hash == self.pat_hash and self.check(i-self.M+1):
                return i - self.M + 1

        return -1


if __name__ == '__main__':
    rk = RabinKarp('26535', '3141592653589793')
    print rk.search()

运行结果:

RM: 30
pat hash: 613
txt hash: 508
201
715
971
442
929
613
6

小结

预处理字符串的时间为O(m),平均时间复杂度为线性级别O(7n)

你可能感兴趣的:(数据结构)