机器学习——决策树的应用

from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import preprocessing
from sklearn import tree
from sklearn.externals.six import StringIO




#read in the csv file and put features in the list of class label
allElectronicsData=open(r'C:\Users\Administrator\Desktop\buy_computer.csv','r')
reader=csv.reader(allElectronicsData)
headers=reader.next()


print(headers)


featureList=[]
labelList=[]
     
for row in reader:
    labelList.append(row[len(row)-1])
    rowDict={}
    for i in range(1,len(row)-1):
        #print(row[i])
        rowDict[headers[i]]=row[i]
        #print("rowDict: ",rowDict)
    featureList.append(rowDict)
       
print(featureList)


 
#Vetorize features
vec=DictVectorizer()
dummyX=vec.fit_transform(featureList).toarray()
 
print("dummyX:"+str(dummyX))
print(vec.get_feature_names())
print("labellist: "+str(labelList))
 
 
#Vectorize class labels
lb=preprocessing.LabelBinarizer()
dummyY=lb.fit_transform(labelList)
print("labelList: "+str(labelList))
print("dummyY: "+str(dummyY))
 
 
#Using decision tree for classification
#clf=tree.DecisiontreeClassifier()
clf=tree.DecisionTreeClassifier(criterion='entropy')
clf=clf.fit(dummyX,dummyY)
print("clf: "+str(clf))
 
 
 
#Visulise model
#with open("allElectronicGiniOri.dot",'w') as f;
with open("allElectronicInformationGainOri.dot",'w') as f:
    # f=tree.export_graphviz(clf,out_file=f)
    f=tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
      
oneRowX=dummyX[0,:]
print("oneRowX: "+str(oneRowX))
   
newRowX=oneRowX
   
newRowX[0]=1
newRowX[2]=0
print("predictedY: "+str(newRowX))
   
predictedY=clf.predict(newRowX)
print("predictedY: "+str(predictedY))
  

你可能感兴趣的:(机器学习,机器学习,决策树,python)