SentiWordNet计算情感倾向

使用NLTK提供的SentiWordNet工具计算一个句子的情感倾向性,计算方法为每个词所处词性下的每个词义情感倾向性之和。

import string

from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.corpus import sentiwordnet
from nltk.corpus import wordnet

# 停用词
stpw = stopwords.words('english')
# 标点符号
punc = list(string.punctuation)
# 不需要分析的词和标点
stop = punc + stpw

# 要分析的句子
sentence = "His performance is so great that he got the price. "
# 1.标记解析
words = word_tokenize(sentence)
for word in words:
    if word.lower() in stop:
        words.remove(word)
print(words)
# 2.词性标注
word_tag = pos_tag(words)
tag_map = {'NN': 'n', 'NNP': 'n', 'NNPS': 'n', 'NNS': 'n', 'UH': 'n',\
           'VB': 'v', 'VBD': 'v', 'VBG': 'v', 'VBN': 'v', 'VBP': 'v', 'VBZ': 'v',\
           'JJ': 'a', 'JJR': 'a', 'JJS': 'a',\
           'RB': 'r', 'RBR': 'r', 'RBS': 'r', 'RP': 'r', 'WRB': 'r'}

word_tag = [(t[0], tag_map[t[1]]) if t[1] in tag_map else (t[0], '') for t in word_tag]
print(word_tag)
# 同义词集senti_synsets()
sentiment_synsets = [list(sentiwordnet.senti_synsets(t[0], t[1])) for t in word_tag]
print(sentiment_synsets)

score = sum(sum([x.pos_score() - x.neg_score() for x in s]) / len(s) for s in sentiment_synsets if len(s) != 0)
print(score)

你可能感兴趣的:(自然语言处理,python)