def chooseBestFeatureToSplit(dataSet):
numFeatures = len(dataSet[0]) - 1 #the last column is used for the labels
baseEntropy = calcShannonEnt(dataSet)
bestInfoGain = 0.0; bestFeature = -1
for i in range(numFeatures): #iterate over all the features
featList = [example[i] for example in dataSet]#create a list of all the examples of this feature
uniqueVals = set(featList) #get a set of unique values
newEntropy = 0.0
for value in uniqueVals:
subDataSet = splitDataSet(dataSet, i, value)
prob = len(subDataSet)/float(len(dataSet))
newEntropy += prob * calcShannonEnt(subDataSet)
infoGain = baseEntropy - newEntropy #calculate the info gain; ie reduction in entropy
if (infoGain > bestInfoGain): #compare this to the best gain so far
bestInfoGain = infoGain #if better than current best, set to best
bestFeature = i
return bestFeature #returns an integer
from __future__ import division
def GetAverage(mat):
n=len(mat)
m= width(mat)
num = [0]*m
for j in range(0,m):
for i in mat:
num[j]=num[j]+i[j]
num[j]=num[j]/n
return num
def width(lst):
i=0
for j in lst[0]:
i=i+1
return i
def GetVar(average,mat):
ListMat=[]
for i in mat:
ListMat.append(list(map(lambda x: x[0]-x[1], zip(average, i))))
n=len(ListMat)
m= width(ListMat)
num = [0]*m
for j in range(0,m):
for i in ListMat:
num[j]=num[j]+(i[j]*i[j])
num[j]=num[j]/n
return num
def DenoisMat(mat):
average=GetAverage(mat)
variance=GetVar(average,mat)
section=list(map(lambda x: x[0]+x[1], zip(average, variance)))
n=len(mat)
m= width(mat)
num = [0]*m
denoisMat=[]
for i in mat:
for j in range(0,m):
if i[j]>section[j]:
i[j]=section[j]
denoisMat.append(i)
return denoisMat
'''
Sampling archive
@author: Garvin Li
'''
import random
def RandomSampling(dataMat,number):
try:
slice = random.sample(dataMat, number)
return slice
except:
print 'sample larger than population'
def SystematicSampling(dataMat,number):
length=len(dataMat)
k=length/number
sample=[]
i=0
if k>0 :
while len(sample)!=number:
sample.append(dataMat[0+i*k])
i+=1
return sample
else :
return RandomSampling(dataMat,number)
/********************************
* 本文来自博客 “李博Garvin“
* 转载请标明出处:http://blog.csdn.net/buptgshengod
******************************************/