《机器学习自学实战之旅(三)》朴素贝叶斯算法的代码实现

先贴个代码,后期再进行注释

import numpy as np

def loadDataSet():
    """
    创建数据集
    :return: 单词列表wordList, 所属类别classVec
    """
    wordList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'], #[0,0,1,1,1......]
                   ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
                   ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
                   ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
                   ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
                   ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
    classVec = [0, 1, 0, 1, 0, 1]  # 1 is abusive, 0 not
    return wordList, classVec

def getVocabList(dataSet):
    vocabList = set([])
    for dataItem in dataSet:
        vocabList |= set(dataItem)
    return list(vocabList)

def word2vec(vocabList,inputDataSet):
    numsVocabList = len(vocabList)
    wordVec = [0] * numsVocabList
    for word in inputDataSet:
        if word in vocabList:
            wordVec[vocabList.index(word)] = 1

    return wordVec

def computeProb(wordVecs,classVec):
    totalNum = len(classVec)
    numOfClass1 = sum(classVec)

    numWordList = len(wordVecs)
    numVocabList = len(wordVecs[0])
    countWord0 = np.ones(numVocabList)
    countWord1 = np.ones(numVocabList)
    numWord0 = 2
    numWord1 = 2

    for i in range(numWordList):
        wordVec = wordVecs[i]

        if classVec[i] == 0:
            countWord0 += wordVec
            numWord0 += sum(wordVec)
        else:
            countWord1 += wordVec
            numWord1 += sum(wordVec)

    probWord0 = np.log(countWord0 / numWord0)
    probWord1 = np.log(countWord1 / numWord1)
    probClass1 = np.log(numOfClass1 / totalNum)
    probClass0 = np.log(1 - numOfClass1 / totalNum)
    return probWord0,probWord1,probClass0,probClass1

def classifyNBayes(inputDataSet,wordList,classVec):
    wordVecs = []
    vocabList = getVocabList(wordList)
    for item in wordList:
        wordVecs.append(word2vec(vocabList,item))

    probWord0, probWord1, probClass0, probClass1 = computeProb(wordVecs,classVec)
    print(probWord0, probWord1, probClass0, probClass1)
    inputDataSetVec = word2vec(vocabList,inputDataSet)
    resultProbOfClass0 = sum(inputDataSetVec * probWord0) + probClass0
    resultProbOfClass1 = sum(inputDataSetVec * probWord1) + probClass1
    if resultProbOfClass0 > resultProbOfClass1:
        return 0
    else:
        return 1


def testNBayes():
    wordList,classVec = loadDataSet()
    print(classifyNBayes(['love', 'my', 'dalmation'],wordList,classVec))
    print(classifyNBayes(['stupid', 'garbage'], wordList, classVec))

testNBayes()


你可能感兴趣的:(自学机器学习,算法,python,机器学习,深度学习)