决策树算法实例(基于ID3)

基于ID3的决策树算法,文中使用的sklearn的库,使用graphviz可以将决策树转换为pdf查看。
 
  

案例中用到的模拟数据如下:

############################################################################

RID age income student credit_rating class_buys_computer 1 youth high no fair no 2 youth high no excellent no 3 middle_aged high no fair yes 4 senior medium no fair yes 5 senior low yes fair yes 6 senior low yes excellent no 7 middle_aged low yes excellent yes 8 youth medium no fair no 9 youth low yes fair yes 10 senior medium yes fair yes 11 youth medium yes excellent yes 12 middle_aged medium no excellent yes 13 middle_aged high yes fair yes 14 senior medium no excellent no

 
  

############################################################################

from sklearn.feature_extraction import DictVectorizer
import  csv
from sklearn import  tree
from  sklearn import  preprocessing
from sklearn.externals.six import StringIO
allelectionicsData = open(r'E:\myAI\AllElectronics.csv','rb')
reader = csv.reader(allelectionicsData)
headers = reader.next()

featureList = []
lableList = []

print(headers)
for row in reader:
    # print row
    lableList.append(row[len(row)-1])
    rowDict = {}
    for i in range(1,len(row) - 1):
        rowDict[headers[i]] = row[i]
    featureList.append(rowDict)

print featureList

vec = DictVectorizer()
dummyX = vec.fit_transform(featureList).toarray()
print("dummyX: " + str(dummyX))
print(vec.get_feature_names())

print("Lablelist: "+str(lableList))

lb = preprocessing.LabelBinarizer()
dummyY = lb.fit_transform(lableList)
print("dummyY: "+ str(dummyY))

clf = tree.DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(dummyX,dummyY)
print("clf: " + str(clf))

with open("allelectionicsData.dot",'w')  as f:
    f = tree.export_graphviz(clf,feature_names = vec.get_feature_names(),out_file=f)

oneRowX = dummyX[0,:]
print("oneRowX: " + str(oneRowX))

newoneRow = oneRowX
newoneRow[0] = 1
newoneRow[2] = 0
print("newoneRow : "+ str(newoneRow))

predictedY = clf.predict(newoneRow)

print("predictedY: " + str(predictedY))

你可能感兴趣的:(机器学习)