python实现TF-IDF

import math
class TFIDf:
    def __init__(self):
        self.doc = [
            ['I','love','you', 'my','dear'],
            ['my','god','I','like','you'],
            ['good','morning'],
            ['good','afternoon']
        ]
        self.vocab = []
        self.dict = {}
        self._tf = []
        self._idf = []
        
    def _getvocab(self):
        for item in self.doc:
            for word in item:
                self.dict[word] = self.dict.get(word,0) + 1
        self.vocab = list(self.dict.keys())
    def tf(self):
        self._getvocab()
        self._tf = []
        for i in self.vocab:
            self._tf.append(self.dict[i])
        
    def idf(self):
        self.tf()
        for word in self.vocab:
            cnt = 0
            for item in self.doc:
                if word in item:
                    cnt += 1
            self._idf.append(math.log(len(self.doc)  / cnt ) + 1)
    def tfidf(self):
        self.idf()
        tf = self._tf
        idf = self._idf
        self.tfidf = []
        for i,j in zip(tf,idf):
            self.tfidf.append(i*j)
        for i in range(len(self.tfidf)):
            print('word',self.vocab[i],'tf',self._tf[i],'idf',self._idf[i])
x = TFIDf()
x.tfidf()
        
           
        
    

 

你可能感兴趣的:(机器学习)