Python实现朴素贝叶斯算法 --- 屏蔽社区留言板的侮辱性言论

# -*- coding:utf-8 -*-
import numpy as np
__author__ = 'yangxin'
"""
贝叶斯公式
p(xy)=p(x|y)p(y)=p(y|x)p(x)
p(x|y)=p(y|x)p(x)/p(y)
"""

"""
屏蔽社区留言板的侮辱性言论
"""


class SpeechJudgment(object):

    def load_data_set(self):
        # 单词列表
        posting_list = [
            ['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
            ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
            ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
            ['stop', 'posting', 'stupid', 'worthless', 'gar e'],
            ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
            ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
        # 属性类别列表 1 -> 侮辱性的文字, 0 -> not
        class_vec = [0, 1, 0, 1, 0, 1]
        return posting_list, class_vec

    def create_vocab_list(self, data_set):
        vocab_set = set()
        for item in data_set:
            vocab_set = vocab_set | set

你可能感兴趣的:(Python,Machine,Learning,Python,朴素贝叶斯,分类算法,机器学习)