决策树方法-对买电脑进行分类预测

用决策树方法对买电脑进行分类预测

决策树方法-对买电脑进行分类预测_第1张图片

决策树方法-对买电脑进行分类预测_第2张图片

from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import preprocessing
from sklearn import tree
# from sklearn.externals.six import StringIO

allElectronicsDate = open(r'E:\Python\practice\Decision_Tree\Class_buys_computer.csv','rt')
reader = csv.reader(allElectronicsDate)#CSV模块自带的reader方法,可按行读取内容
# print('reader:'+ str(reader))
headers = next(reader)

print(headers)


featureList = []
labelList = []

for row in reader:

    print(row)
    labelList.append(row[len(row)-1])
    # print(labelList)
    rowDict = {}
    for i in range(1,len(row)-1):
        # print(row[i])
        rowDict[headers[i]]=row[i]
        # print(rowDict)
    featureList.append(rowDict)
print(featureList)
print(labelList)

vec = DictVectorizer()#python自带模块
dummyX = vec.fit_transform(featureList).toarray()
#调用方法fit_transform将字典类型的[{'a':'b'},{'c':'d'}]数据中的'b','d'数据转换成0,1的矩阵形式

print("dummyX:"+str(dummyX))
print(vec.get_feature_names())#调用此方法得到'b','d'对应的特征名

print("labellist:"+str(labelList))


lb = preprocessing.LabelBinarizer()#python内部模块
dummyY = lb.fit_transform(labelList)#调用fit_transform方法将标签列表中的数据转成0,1格式
print("dummyY:"+str(dummyY))


clf = tree.DecisionTreeClassifier(criterion='entropy')
#tree模块,创建clf分类器,entropy表示度量标准信息熵
clf = clf.fit(dummyX,dummyY)
# 用训练数据dummyX,dummyY拟合分类器模型
print("clf:"+str(clf))



with open("allElectronicInformationGainOri.dot",'w') as f:
    f = tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file = f)
    #通过export_graphviz模块导出dot文件到1.dot文件中,后通过cmd命令dot -Tpdf  1.dot -o 1.pdf
    #将dot文件转化成pdf视图

#↓↓↓造一组新数据,来预测分类
oneRowX = dummyX[0,:]#取X矩阵数组里面的第一行
print("oneRowX:" + str(oneRowX))

newRowX = oneRowX#赋给新标签

newRowX[0] = 1
newRowX[2] = 0
print("newRowX:" + str(newRowX))
#newRowX:[ 1.  0.  0.  0.  1.  1.  0.  0.  1.  0.]

newRowX = newRowX.reshape(1,-1)
#将列表转化为矩阵,共predict调用
print("newRowX:" + str(newRowX))
#newRowX:[[ 1.  0.  0.  0.  1.  1.  0.  0.  1.  0.]]

predictedY = clf.predict(newRowX)
#用之前创建好的分类器clf(classifier),newRowX必须是矩阵类型
print("predictedY:" + str(predictedY))




你可能感兴趣的:(决策树方法-对买电脑进行分类预测)