最后自己的实现代码如下:
import graphviz
import pandas as pd
from sklearn import tree
from sklearn.model_selection import train_test_split
filename = 'crx.data'
re = pd.read_csv(filename)
data = re.dropna(axis=0, how='any')
data_symbol = data.iloc[:, 0:15]
data_target = data.iloc[:, 15]
# d = pd.get_dummies(data, sparse=True)
# print(data_symbol)
# print(data_target)
symbol = pd.get_dummies(data_symbol, sparse=True)
Xtrain,Xtest,Ytrain,Ytest = train_test_split(symbol,data_target,test_size=0.3)
# print(Xtrain)
# print(Xtest)
clf = tree.DecisionTreeClassifier()
clf = clf.fit(Xtrain,Ytrain)
score = clf.score(Xtest,Ytest)
dot_data = tree.export_graphviz(clf
,class_names=["+","-"]
,filled=True
,rounded=True)
graph = graphviz.Source(dot_data)
graph.view()