sklearn.ensemble.RandomForestRegressor建立随机森林,GridSearchCV进行参数选择

import pandas as pd
#使用sklearn导入决策树模块
from sklearn import tree
#导入sklearn内置的房价数据集
from sklearn.datasets.california_housing import fetch_california_housing

#获取房价数据集
housing = fetch_california_housing()

#导入数据集划分模块
from sklearn.model_selection import train_test_split
#分割数据集   测试集占0.1
data_train, data_test, target_train, target_test = \
    train_test_split(housing.data, housing.target, test_size = 0.1, random_state = 42)
#创建决策树模型对象   参数全部采用默认值
dtr = tree.DecisionTreeRegressor(random_state = 42)
#像模型传入数据
dtr.fit(data_train, target_train)
#打印决策树模型的精度值
print(dtr.score(data_test, target_test))

#导入随机森林模块   参数全部采用默认值
from sklearn.ensemble import RandomForestRegressor
#创建随机森林对象
rfr = RandomForestRegressor(random_state = 42)
#传入数据
rfr.fit(data_train, target_train)
#打印模型精度值
print(rfr.score(data_test, target_test))

#导入交叉验证选择参数模块
from sklearn.model_selection import GridSearchCV
#创建可选参数字典
tree_param_grid = { 'min_samples_split': list((3,6,9)),'n_estimators':list((10,50,100))}
#创建选参对象,传入模型对象,参数字典,以及指定进行5折交叉验证
grid = GridSearchCV(RandomForestRegressor(),param_grid=tree_param_grid, cv=5)
#向选参对象传入训练集数据
grid.fit(data_train, target_train)
#打印各参数组合得分
print(grid.cv_results_['mean_test_score'])
#打印最佳参数及其得分
print(grid.best_params_, grid.best_score_)

输出结果为

0.637355881715626
0.8097021394052101
[0.78442267 0.80418131 0.80695249 0.78925709 0.80441837 0.80629562
 0.78909716 0.80315641 0.80613071]
{'min_samples_split': 3, 'n_estimators': 100} 0.8069524940694903

Process finished with exit code 0

你可能感兴趣的:(机器学习)