(实战)决策树例子+画图

数据集
(实战)决策树例子+画图_第1张图片

from sklearn.feature_extraction import DictVectorizer
from sklearn import tree
from sklearn import preprocessing
import csv
#因为文件内容大多为字符类型,所以import CSV来读
# 读入数据
Dtree = open(r'AllElectronics.csv', 'r')
reader = csv.reader(Dtree)

# 获取第一行数据,读取表头
headers = reader.__next__()
print(headers)

# 定义两个列表
featureList = []
labelList = []

# 
for row in reader:
    # 把最后一列的label存入list
    labelList.append(row[-1])
    rowDict = {}#空字典
    for i in range(1, len(row)-1):
        #建立一个数据字典
        rowDict[headers[i]] = row[i]#表头与表项一一对应
    # 把数据字典存入list
    featureList.append(rowDict)

print(featureList)

(实战)决策树例子+画图_第2张图片

# 把数据转换成01表示
vec = DictVectorizer()
x_data = vec.fit_transform(featureList).toarray()
print("x_data: " + str(x_data))

# 打印属性名称
print(vec.get_feature_names())

# 打印标签
print("labelList: " + str(labelList))

# 把标签转换成01表示
lb = preprocessing.LabelBinarizer()
y_data = lb.fit_transform(labelList)
print("y_data: " + str(y_data))

(实战)决策树例子+画图_第3张图片
(实战)决策树例子+画图_第4张图片
(实战)决策树例子+画图_第5张图片

# 创建决策树模型
model = tree.DecisionTreeClassifier(criterion='entropy')
# 输入数据建立模型
model.fit(x_data, y_data)

(实战)决策树例子+画图_第6张图片

# 测试
x_test = x_data[0]
print("x_test: " + str(x_test))

predict = model.predict(x_test.reshape(1,-1))
#x_test.reshape(1,-1)将一维数据转化为二维
print("predict: " + str(predict))

(实战)决策树例子+画图_第7张图片

# 导出决策树
# conda install graphviz
# http://www.graphviz.org/
import graphviz 

dot_data = tree.export_graphviz(model, 
                                out_file = None, 
                                feature_names = vec.get_feature_names(),
                                class_names = lb.classes_,
                                filled = True,
                                rounded = True,
                                special_characters = True)
graph = graphviz.Source(dot_data)
graph.render('computer')#把图保存到当前的目录下面

(实战)决策树例子+画图_第8张图片

graph

(实战)决策树例子+画图_第9张图片

vec.get_feature_names()
lb.classes_

(实战)决策树例子+画图_第10张图片

你可能感兴趣的:(决策树,决策树,python,机器学习)