#created by sheng.chen at 2015.05.06 class WordTree(): def __init__(self): self.count = 0 self.word = '' self.fail = None self.parent = None self.childs = [] def createTree(words): root = WordTree() for word in words: tmpnode = root for i in range(len(word)): w = word[i] flag = False for child in tmpnode.childs: if child.word == w: tmpnode = child flag = True break if flag != True: newnode = WordTree() newnode.parent = tmpnode newnode.word = w tmpnode.childs.append(newnode) tmpnode = newnode if i == len(word) - 1: tmpnode.count += 1 return root def generateFail(wordtree, root): if wordtree.parent == None: wordtree.fail = root else: parentfail = wordtree.parent.fail if parentfail != None: for child in parentfail.childs: if child.word == wordtree.word and child != wordtree: wordtree.fail = child break if wordtree.fail == None: wordtree.fail = root for chd in wordtree.childs: generateFail(chd, root) def find(source, targets): wordtree = createTree(targets) generateFail(wordtree, wordtree) #traverseTree(wordtree, '') currentnode = wordtree i = 0 for ch in source: #print ch first = True ismatch = False while first or currentnode.parent != None: first = False for child in currentnode.childs: #print ch + '-' + child.word if child.word == ch: ismatch = True currentnode = child if child.count == 1: print getword(child) break if ismatch == False: currentnode = currentnode.fail if currentnode.count == 1: print getword(currentnode) else: break def getword(node): if node.parent == None: return node.word else: return getword(node.parent) + node.word def traverseTree(wordtree, indent): if wordtree.parent == None: print '%s%s:%s' %(indent, wordtree.word, wordtree.count) else: print '%s%s:%s' %(indent, wordtree.word, wordtree.count) for child in wordtree.childs: traverseTree(child, indent+'-') source = 'yasherhsayes' words = ['say', 'she', 'he', 'her', 'shr', 'yes'] #root = createTree(words) #generateFail(root, root) #traverseTree(root, '') find(source, words)