实例:词频统计
import pprint
text = """
The Python Software Foundation (PSF) is a 501(c)(3) non-profit corporation that holds the intellectual
property rights behind the Python programming language. We manage the open source licensing for Python
version 2.1 and later and own and protect the trademarks associated with Python. We also run the North
American PyCon conference annually, support other Python conferences around the world, and fund Python
related development with our grants program and by funding special projects.
"""
punctuation = "n~@#$%^&*()_-+=<>?/,.:;{}[]|\'\""
for i in punctuation:
if i in text:
text.replace(i,' ')
text_list = text.split(' ')
text_list = [x.strip() for x in text_list]
number = set(text_list)
word_number = {}
for word in number:
word_number[word] = text_list.count(word)
sort_list = sorted(word_number.items(),key=lambda x:x[1],reverse=True)
pprint.pprint(sort_list)
//输出
[('the', 6),
('Python', 5),
('and', 5),
('with', 2),
('We', 2),
('', 1),
('trademarks', 1),
('other', 1),
('program', 1),
('run', 1),
('is', 1),
('behind', 1),
('for', 1),
('protect', 1),
('Foundation', 1),
('also', 1),
('intellectual', 1),
('fund', 1),
('Python.', 1),
('our', 1),
('by', 1),
('special', 1),
('North', 1),
('conference', 1),
('that', 1),
('annually,', 1),
('manage', 1),
('programming', 1),
('version', 1),
('American', 1),
('own', 1),
('related', 1),
('Software', 1),
('source', 1),
('conferences', 1),
('property', 1),
('holds', 1),
('licensing', 1),
('501(c)(3)', 1),
('development', 1),
('later', 1),
('(PSF)', 1),
('associated', 1),
('The', 1),
('world,', 1),
('funding', 1),
('2.1', 1),
('projects.', 1),
('a', 1),
('rights', 1),
('language.', 1),
('corporation', 1),
('grants', 1),
('non-profit', 1),
('open', 1),
('PyCon', 1),
('support', 1),
('around', 1)]
进程已结束,退出代码0