1、首先是安装
1.1、安装Python 3.4
注意要用32位版本
http://www.python.org/downloads/
1.2、安装Numpy
注意两点,一是不一定所有版本都有windows安装包,二是要找支持python3.4的安装包
http://sourceforge.net/projects/numpy/files/NumPy/
2、下载NLT Data
方法1:
在python中运行:
import nltk
nltk.download()
3、进行分词
3.1、设置环境变量
set PYTHON_HOME=C:\NeoLanguages\Python34_x86
set PATH=%PYTHON_HOME%;%PATH%
set NLTK_DATA=D:\NLP\NLTK\nltk_data
@python
3.2、py文件
#!usr/bin/python
import nltk
#测试句子
sentence = "Don’t ever let somebody tell you you can’t do something, not even me. \
You got a dream, you gotta protect it. People can’t do something themselves, \
they wanna tell you you can’t do it. If you want something, go get it. Period."
#分词
tokens = nltk.word_tokenize(sentence)
#词性标注
tagged = nltk.pos_tag(tokens)
#句法分析
entities = nltk.chunk.ne_chunk(tagged)
3.3、逐句运行
D:\MyProjects\NLP\NLTK>python
Python 3.4.4 (v3.4.4:737efcadf5a6, Dec 20 2015, 19:28:18) [MSC v.1600 32 bit (In
tel)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import nltk
>>> sentence = "Don’t ever let somebody tell you you can’t do something, not e
ven me. \
... You got a dream, you gotta protect it. People can’t do something themselves
, \
... they wanna tell you you can’t do it. If you want something, go get it. Peri
od."
>>> tokens = nltk.word_tokenize(sentence)
>>> tagged = nltk.pos_tag(tokens)
>>> entities = nltk.chunk.ne_chunk(tagged)
>>> tokens
['Don’t', 'ever', 'let', 'somebody', 'tell', 'you', 'you', 'can’t', 'do', 'som
ething', ',', 'not', 'even', 'me', '.', 'You', 'got', 'a', 'dream', ',', 'you',
'got', 'ta', 'protect', 'it', '.', 'People', 'can’t', 'do', 'something', 'thems
elves', ',', 'they', 'wan', 'na', 'tell', 'you', 'you', 'can’t', 'do', 'it', '.
', 'If', 'you', 'want', 'something', ',', 'go', 'get', 'it', '.', 'Period', '.']
>>> tagged
[('Don’t', 'NNP'), ('ever', 'RB'), ('let', 'VB'), ('somebody', 'NN'), ('tell',
'VB'), ('you', 'PRP'), ('you', 'PRP'), ('can’t', 'VBP'), ('do', 'VB'), ('someth
ing', 'NN'), (',', ','), ('not', 'RB'), ('even', 'RB'), ('me', 'PRP'), ('.', '.'
), ('You', 'PRP'), ('got', 'VBD'), ('a', 'DT'), ('dream', 'NN'), (',', ','), ('y
ou', 'PRP'), ('got', 'VBD'), ('ta', 'JJ'), ('protect', 'NN'), ('it', 'PRP'), ('.
', '.'), ('People', 'NNS'), ('can’t', 'VBP'), ('do', 'VBP'), ('something', 'NN'
), ('themselves', 'PRP'), (',', ','), ('they', 'PRP'), ('wan', 'VBP'), ('na', 'T
O'), ('tell', 'VB'), ('you', 'PRP'), ('you', 'PRP'), ('can’t', 'VBP'), ('do', '
VB'), ('it', 'PRP'), ('.', '.'), ('If', 'IN'), ('you', 'PRP'), ('want', 'VBP'),
('something', 'NN'), (',', ','), ('go', 'VBP'), ('get', 'VB'), ('it', 'PRP'), ('
.', '.'), ('Period', 'NNP'), ('.', '.')]
>>> entities
Tree('S', [('Don’t', 'NNP'), ('ever', 'RB'), ('let', 'VB'), ('somebody', 'NN'),
('tell', 'VB'), ('you', 'PRP'), ('you', 'PRP'), ('can’t', 'VBP'), ('do', 'VB')
, ('something', 'NN'), (',', ','), ('not', 'RB'), ('even', 'RB'), ('me', 'PRP'),
('.', '.'), ('You', 'PRP'), ('got', 'VBD'), ('a', 'DT'), ('dream', 'NN'), (',',
','), ('you', 'PRP'), ('got', 'VBD'), ('ta', 'JJ'), ('protect', 'NN'), ('it', '
PRP'), ('.', '.'), ('People', 'NNS'), ('can’t', 'VBP'), ('do', 'VBP'), ('someth
ing', 'NN'), ('themselves', 'PRP'), (',', ','), ('they', 'PRP'), ('wan', 'VBP'),
('na', 'TO'), ('tell', 'VB'), ('you', 'PRP'), ('you', 'PRP'), ('can’t', 'VBP')
, ('do', 'VB'), ('it', 'PRP'), ('.', '.'), ('If', 'IN'), ('you', 'PRP'), ('want'
, 'VBP'), ('something', 'NN'), (',', ','), ('go', 'VBP'), ('get', 'VB'), ('it',
'PRP'), ('.', '.'), Tree('PERSON', [('Period', 'NNP')]), ('.', '.')])
>>>