AC自动机

#created by sheng.chen at 2015.05.06

class WordTree():
    def __init__(self):
        self.count = 0
        self.word = ''
        self.fail = None
        self.parent = None
        self.childs = []
        

def createTree(words):
    root = WordTree()
    for word in words:
        tmpnode = root
        for i in range(len(word)):
            w = word[i]
            flag = False
            for child in tmpnode.childs:
                if child.word == w:  
                    tmpnode = child
                    flag = True
                    break
            if flag != True:
                newnode = WordTree()
                newnode.parent = tmpnode
                newnode.word = w
                tmpnode.childs.append(newnode)
                tmpnode = newnode
            if i == len(word) - 1:
                tmpnode.count += 1
    return root

def generateFail(wordtree, root):
    if wordtree.parent == None:
        wordtree.fail = root
    else:
        parentfail = wordtree.parent.fail
        if parentfail != None:
            for child in parentfail.childs:
                if child.word == wordtree.word and child != wordtree:
                    wordtree.fail = child
                    break
        if wordtree.fail == None:  
            wordtree.fail = root         
    for chd in wordtree.childs:
        generateFail(chd, root)
   
def find(source, targets):
    wordtree = createTree(targets)
    generateFail(wordtree, wordtree)
    #traverseTree(wordtree, '')
    currentnode = wordtree 
    i = 0
    for ch in source:
        #print ch
        first = True
        ismatch = False
        while first or currentnode.parent != None:
            first = False
            for child in currentnode.childs:
                #print ch + '-' + child.word
                if child.word == ch:
                    ismatch = True
                    currentnode = child
                    if child.count == 1:
                        print getword(child)
                    break
            if ismatch == False:
                currentnode = currentnode.fail
                if currentnode.count == 1:
                    print getword(currentnode)
            else:
                break

def getword(node):
    if node.parent == None:
        return node.word
    else:
        return getword(node.parent) + node.word
    

def traverseTree(wordtree, indent):
    if wordtree.parent == None:
        print '%s%s:%s' %(indent, wordtree.word, wordtree.count)
    else:    
        print '%s%s:%s' %(indent, wordtree.word, wordtree.count)
    for child in wordtree.childs:
        traverseTree(child, indent+'-')

source = 'yasherhsayes' 
words = ['say', 'she', 'he', 'her', 'shr', 'yes'] 
#root = createTree(words)
#generateFail(root, root)
#traverseTree(root, '')
find(source, words)
            
          
            


 

你可能感兴趣的:(python,AC自动机)