注:数据集在文章末尾
from sklearn.feature_extraction import DictVectorizer
from sklearn import tree
from sklearn import preprocessing
import csv
# 读入数据
Dtree = open(r'AllElectronics.csv', 'r')
reader = csv.reader(Dtree)
# 获取第一行数据
headers = reader.__next__()
print(headers)
# 定义两个列表
featureList = []
labelList = []
for row in reader:
# 把label存入list
labelList.append(row[-1])
rowDict = {
}
for i in range(1, len(row)-1):
# 建立一个数据字典
rowDict[headers[i]] = row[i]
# 把数据字典存入list
featureList.append(rowDict)
print(featureList)
# 把数据转换成01表示
vec = DictVectorizer()
x_data = vec.fit_transform(featureList).toarray()
print("x_data: " + str(x_data))
# 打印属性名称
print(vec.get_feature_names())
# 打印标签
print("labelList: " + str(labelList))
# 把标签转换成01表示
lb = preprocessing.LabelBinarizer()
y_data = lb.fit_transform(labelList)
print("y_data: " + str(y_data))
# 创建决策树模型
model = tree.DecisionTreeClassifier(criterion='entropy')
# 输入数据建立模型
model.fit(x_data, y_data)
# 测试
x_test = x_data[0]
print("x_test: " + str(x_test))
predict = model.predict(x_test.reshape(1,-1))
print("predict: " + str(predict))
# 导出决策树
# pip install graphviz
# http://www.graphviz.org/
import graphviz
dot_data = tree.export_graphviz(model,
out_file = None,
feature_names = vec.get_feature_names(),
class_names = lb.classes_,
filled = True,
rounded = True,
special_characters = True)
graph = graphviz.Source(dot_data)
graph.render('computer')
from sklearn import tree
import numpy as np
# 载入数据
data = np.genfromtxt("cart.csv", delimiter=",")
x_data = data[1:,1:-1]
y_data = data[1:,-1]
# 创建决策树模型
model = tree.DecisionTreeClassifier()
# 输入数据建立模型
model.fit(x_data, y_data)
# 导出决策树
import graphviz # http://www.graphviz.org/
dot_data = tree.export_graphviz(model,
out_file = None,
feature_names = ['house_yes','house_no','single','married','divorced','income'],
class_names = ['no','yes'],
filled = True,
rounded = True,
special_characters = True)
graph = graphviz.Source(dot_data)
graph.render('cart')
数据集:“AllElectronics.csv”:
RID,age,income,student,credit_rating,class_buys_computer
1,youth,high,no,fair,no
2,youth,high,no,excellent,no
3,middle_aged,high,no,fair,yes
4,senior,medium,no,fair,yes
5,senior,low,yes,fair,yes
6,senior,low,yes,excellent,no
7,middle_aged,low,yes,excellent,yes
8,youth,medium,no,fair,no
9,youth,low,yes,fair,yes
10,senior,medium,yes,fair,yes
数据集:“cart.csv”:
RID,house_yes,house_no,single,married,divorced,income,label
1,1,0,1,0,0,125,0
2,0,1,0,1,0,100,0
3,0,1,1,0,0,70,0
4,1,0,0,1,0,120,0
5,0,1,0,0,1,95,1
6,0,1,0,1,0,60,0
7,1,0,0,0,1,220,0
8,0,1,1,0,0,85,1
9,0,1,0,1,0,75,0
10,0,1,1,0,0,90,1