决策树ID3算法案例

import pandas as pd
file_root="G:/python/源码/源码/lesson.csv"
dataframe=pd.read_csv(file_root,encoding="gbk")
#print(dataframe)
x=dataframe.ix[:,1:5].as_matrix()
y=dataframe.ix[:,5].as_matrix()
for i in range(0,len(x)):
    for j in range(0,len(x[i])):
        if (x[i][j]=="是" or x[i][j]=="高" or x[i][j]=="多"):
            x[i][j]=1
        else:x[i][j]=-1
for i in range(0,len(y)):
    if(y[i]=="高"):
        y[i]=1
    else:
        y[i]=-1
#容易出错的地方:直接将X,Y进行训练
#正确做法:将x,y转化为dataframe,然后再转化为数组并指定整数格式
xf=pd.DataFrame(x).as_matrix().astype(int)
yf=pd.DataFrame(y).as_matrix().astype(int)
from sklearn.tree import DecisionTreeClassifier
model=DecisionTreeClassifier(criterion="entropy")#使用ID3算法
model.fit(xf,yf)
#可视化
from sklearn.tree import export_graphviz
from sklearn.externals.six import StringIO
with open("G:/python 安装/graphviz/bin/dtc.dot","w") as file:
    export_graphviz(model,feature_names=["combat","num","promption","datam"],out_file=file)
#用graphviz将.dot转化为png、pdf
#dot -Tpng dtc.dot -o lesson.png

你可能感兴趣的:(python)