用python 写一个中文分词器

在不用第三方库的情况下,用python 写一个分词器
需要先有一个本地词典才能进行分词

import re
import os
os.chdir('d:\workpath')
# #将jieba分词的词典变为自己的词典
# f=open('dict.txt','r+',encoding="utf-8")
# g=open('dic.txt','w+',encoding='UTF-8')
# f=f.readlines()
# for i in range(0, len(f)):
#     m=re.search(r'(.*?) ',f[i])
#     a=m.group()
#     g.writelines('%s\n'%str(a))#将结果输出
# g.close()
h=open('dic.txt','r+',encoding='UTF-8')
text=h.readlines()
mydict=[]#创建新字典
for i in range(0, len(text)):
    mydict.append(str(text[i][0:len(text[i])-2]))
h.close()
def CutWords(sentence):
    result = []
    start = 0
    m=7
    while len(sentence)- start>=1:
        n =len(sentence)- start
        if n<m:
            m=n
        cutword = sentence[start:start+m]
        def CheckWords():
            nonlocal start
            nonlocal m
            if cutword in mydict:
                result.append(cutword)
                start = start + m
                m=7
        CheckWords()
        if cutword not in mydict:
            if m==1:
                result.append(cutword)
                start = start + m
                m = 7
            else:
                m = m - 1
                CheckWords()
    print(result)

你可能感兴趣的:(python)