python机器学习入门(2)模型优化(以决策树为例),来自kaggle竞赛

from sklearn.metrics import mean_absolute_error
from sklearn.tree import DecisionTreeRegressor

# 此函数用于返回模型拟合效果,用绝对平均误差评估
'''
    max_leaf_nodes 决策树分叶数
    train_X        训练集自变量
    val_X          测试集自变量
    train_y        训练集因变量
    val_y          测试集因变量
'''
def get_mae(max_leaf_nodes, train_X, val_X, train_y, val_y):
    model = DecisionTreeRegressor(max_leaf_nodes=max_leaf_nodes, random_state=0)
    model.fit(train_X, train_y)
    preds_val = model.predict(val_X)
    mae = mean_absolute_error(val_y, preds_val)
    return(mae)

# 通过循环在不同叶数下比较拟合效果并打印
for max_leaf_nodes in [5, 50, 500, 5000]:
    my_mae = get_mae(max_leaf_nodes, train_X, val_X, train_y, val_y)
    print("Max leaf nodes: %d  \t\t Mean Absolute Error:  %d" %(max_leaf_nodes, my_mae))

# 等待后续补充,将拟合结果保存至数组,取出最优解

你可能感兴趣的:(python,机器学习,决策树)