基于决策树模型的分类算法代码示例(自用)

基于决策树模型进行分类算法。

import numpy as np
import sklearn.ensemble as se
import sklearn.preprocessing as sp

# 读取样本数据
data = []
with open('data.txt', 'r') as f:
    for line in f:
        data.append(line[:-1].split(","))

data = np.array(data)
train_x, train_y, encoders = [], [], []

# 创建标签编码器,转换数据为标签编码数据
for index, row in enumerate(data.T):
    encoder = sp.LabelEncoder()  # 生成标签编码器
    if index < (len(data.T) - 1):
        train_x.append(encoder.fit_transform(row))  # 转换数据
    else:
        train_y = encoder.fit_transform(row)
    encoders.append(encoder)  # 保存标签编码数据到encoders

# 整理数据集
train_x, train_y = np.array(train_x).T, np.array(train_y).astype('f8')
# 训练随机森林分类器
model = se.RandomForestClassifier(max_depth=4, n_estimators=150, random_state=6)
model.fit(train_x, train_y)

# 测试数据输入转换
while True:
    x = input("[测试集输入:]").strip()
    x = np.array(x.split(","))
    x = x.reshape(-1, 1)  # 必须转换为n行1列的二维数组

    test_x = []
    for i, row in enumerate(x):  # 0,1,2
        encoder = encoders[i]
        test_x.append(encoder.transform(row))

    test_x = np.array(test_x).T
    pred_y = model.predict(test_x)

你可能感兴趣的:(决策树,分类)