先贴个代码,后期再进行注释
import numpy as np
def loadDataSet():
"""
创建数据集
:return: 单词列表wordList, 所属类别classVec
"""
wordList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'], #[0,0,1,1,1......]
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
['stop', 'posting', 'stupid', 'worthless', 'garbage'],
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
classVec = [0, 1, 0, 1, 0, 1] # 1 is abusive, 0 not
return wordList, classVec
def getVocabList(dataSet):
vocabList = set([])
for dataItem in dataSet:
vocabList |= set(dataItem)
return list(vocabList)
def word2vec(vocabList,inputDataSet):
numsVocabList = len(vocabList)
wordVec = [0] * numsVocabList
for word in inputDataSet:
if word in vocabList:
wordVec[vocabList.index(word)] = 1
return wordVec
def computeProb(wordVecs,classVec):
totalNum = len(classVec)
numOfClass1 = sum(classVec)
numWordList = len(wordVecs)
numVocabList = len(wordVecs[0])
countWord0 = np.ones(numVocabList)
countWord1 = np.ones(numVocabList)
numWord0 = 2
numWord1 = 2
for i in range(numWordList):
wordVec = wordVecs[i]
if classVec[i] == 0:
countWord0 += wordVec
numWord0 += sum(wordVec)
else:
countWord1 += wordVec
numWord1 += sum(wordVec)
probWord0 = np.log(countWord0 / numWord0)
probWord1 = np.log(countWord1 / numWord1)
probClass1 = np.log(numOfClass1 / totalNum)
probClass0 = np.log(1 - numOfClass1 / totalNum)
return probWord0,probWord1,probClass0,probClass1
def classifyNBayes(inputDataSet,wordList,classVec):
wordVecs = []
vocabList = getVocabList(wordList)
for item in wordList:
wordVecs.append(word2vec(vocabList,item))
probWord0, probWord1, probClass0, probClass1 = computeProb(wordVecs,classVec)
print(probWord0, probWord1, probClass0, probClass1)
inputDataSetVec = word2vec(vocabList,inputDataSet)
resultProbOfClass0 = sum(inputDataSetVec * probWord0) + probClass0
resultProbOfClass1 = sum(inputDataSetVec * probWord1) + probClass1
if resultProbOfClass0 > resultProbOfClass1:
return 0
else:
return 1
def testNBayes():
wordList,classVec = loadDataSet()
print(classifyNBayes(['love', 'my', 'dalmation'],wordList,classVec))
print(classifyNBayes(['stupid', 'garbage'], wordList, classVec))
testNBayes()