Python第三方库sklearn
提供了决策树生成算法,此次作业便是用sklearn
完成
sklearn
在datasets类之中提供了wine数据集,其中wine.data
和wine.terget
的类型均是numpy.ndarrray
,也就是numpy的多维矩阵.在导入数据时也同样使用ndarray
.
import pandas
from sklearn import tree
import matplotlib
matplotlib.use('TkAgg')
import numpy as np
import graphviz
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files/Graphviz/bin'
"""
用1.2.3来表示价格的高低
使用1代表中餐,2代表法式,3代表快餐,4代表意大利式
0代表没人,1代表有人,2代表客满
等待时间分为四个级别,0代表0-10,1代表10-30,2代表30-60,3代表>60
其他均使用0或者1代表
"""
data = np.array([[1,1,3,2,1,1]
,[1,1,1,1,2,2]
,[0,0,1,3,1,1]
,[1,1,1,1,3,1]
,[1,0,3,2,2,3]
,[0,1,2,4,1,1] #6
,[0,0,1,3,0,0]
,[0,1,2,1,1,0]
,[0,0,1,3,2,3]
,[1,1,3,3,2,1]
,[1,0,1,1,0,0]
,[0,1,1,3,2,2]])
#### 初始化数据
DecisionData = pandas.read_csv(
"E:\documents\DecisionTree.csv"
)
targetData = DecisionData[['ShouldWeight']]
targetDataArray = targetData.to_numpy()
print(targetDataArray)
#### 拟合
featureName = ['HasOtherChoices?','isHungry','Price','RestaurantType','isCrowded','TimeToWait']
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(data, targetData)
score = clf.score(data,targetData)
#### 调用Graphviz生成决策树
dot_data = tree.export_graphviz(
clf
,out_file=None
,feature_names=featureName
,filled=True
,rounded=True
)
graph = graphviz.Source(dot_data)
graph.render('DecisionTree')