#coding=gbk import nltk import math def entropy(labels): freqdist = nltk.FreqDist(labels) #Frequency Distributions probs = [freqdist.freq(l) for l in freqdist] #freqdist.freq(l) 是将频率转换成概率。即频率除以总个数等于概率 return -sum([p * math.log(p,2) for p in probs]) print entropy(["u","m","m","u"]) #结果为1 print entropy(["m","m","m","m"]) #结果为-0.0