python 字典树的应用语言模型统计词频

class TrieNode():
    def __init__(self,value=None,count=0,parent=None):
        self.value = value #值
        self.count = count #频数统计
        self.parent = parent #父结点
        self.children = {} #子节点
class Trie():
    def __init__(self):
        self.root = TrieNode()
    def insert(self,sequence,value):
        """

        :param sequence: 要查找的字符串
        :param value:  词频的数值
        :return:  None
        """
        cur_node = self.root
        for item in sequence:
            if item not in cur_node.children:
                child = TrieNode(value=item,count=0,parent=cur_node)
                cur_node.children[item] = child
                cur_node = child
            else:
                #更新结点
                cur_node = cur_node.children[item]
        cur_node.count = value
    def search(self,sequence):
        """
        查询是否存在完整序列
        :param sequence:
        :return: bool
        """
        cur_node = self.root
        mark =True
        for item in sequence:
            if item not in cur_node.children:
                mark = False
                break
            else:
                cur_node = cur_node.children[item]
        #如果还有子节点 说明序列并非完整
        if cur_node.children:
            mark = False
        if mark:
            return cur_node.count
        else:
            return 0

 

你可能感兴趣的:(python)