# -*- coding:utf-8 -*-
import numpy as np
__author__ = 'yangxin'
"""
贝叶斯公式
p(xy)=p(x|y)p(y)=p(y|x)p(x)
p(x|y)=p(y|x)p(x)/p(y)
"""
"""
屏蔽社区留言板的侮辱性言论
"""
class SpeechJudgment(object):
def load_data_set(self):
# 单词列表
posting_list = [
['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
['stop', 'posting', 'stupid', 'worthless', 'gar e'],
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
# 属性类别列表 1 -> 侮辱性的文字, 0 -> not
class_vec = [0, 1, 0, 1, 0, 1]
return posting_list, class_vec
def create_vocab_list(self, data_set):
vocab_set = set()
for item in data_set:
vocab_set = vocab_set | set