sss

  # !/usr/bin/env python
 2  #  -*-coding: UTF-8-*-
 3 
 4  # 构建节点
 5  class Node:
 6    def  __init__(self):
 7     self.map = {}
 8    def contain(self,key):
 9      return self.map. __contains__(key)
10    def  __getitem__(self,key):
11      return self.map[key]
12    def  __setitem__(self,key,value):
13     self.map[key]=value
14     
15  class Item:
16    def  __init__(self,key):
17     self.key=key
18     self.subNum=0
19     self.subNode=Node()
20    def add(self,key,item):
21     self.subNum += 1
22     self.subNode[key] = item
23 
24  # 建树
25  def build_tree(input):
26    """ build a tree """
27   sock = open(input, " r ")
28   buf = sock.read()
29   buf = chinese(buf)
30   tree = Item( "   ")
31    for word  in buf:
32     current = tree
33      for x  in word:
34        if current.subNode.contain(x):
35         current = current.subNode[x]
36        else:
37         item = Item(x)
38         current.add(x,item)
39         current = item
40    print tree
41    return tree
42 
43  def search(buf,thefile):
44    " search. "
45   buf = chinese(buf)
46   tree = build_tree(thefile)
47   havefind =  ""
48    for word  in buf:
49     current = tree
50      for x  in word:
51        if current.subNode.contain(x):
52         current = current.subNode[x]
53          if  current.subNum == 0:
54           havefind +=  "".join(word)
55        else:
56          break
57    return havefind
58 
59  # 判断读入的是否为汉字
60  def chinese(input):
61    " remove the not chinese "
62   input = unicode(input, " utf8 ")
63   buf = []
64    for word  in input:
65      if word >= u ' \u4e00 '  and word <= u ' \u9fa5 ':
66       buf = buf+[word.encode( ' utf-8 ')]
67    return buf
68 
69 
70  def run():
71   fileName =  " test "
72   textChar =  " "
73   buf = search(textChar,fileName)
74    print buf
75 
76  if  __name__ ==  " __main__ ":
77   run()

有关字典树的理论和概念可以参见http://blog.csdn.net/v_july_v/article/details/6897097
有关python的建树过程http://bytes.com/topic/python/answers/827476-dictionary-tree-format-hopefully-simple

http://www.cnblogs.com/wangfupeng1988/archive/2011/04/12/2013860.html

转载于:https://www.cnblogs.com/dengyigod/p/3179059.html

你可能感兴趣的:(sss)