决策树、随机森林,小例子


import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets.california_housing import fetch_california_housing
housing=fetch_california_housing()
print(housing.DESCR)
from sklearn import tree
dtr=tree.DecisionTreeRegressor(max_depth=2)
# .fit 相当于传进两个数据,一个是输入x。一个是输出y
dtr.fit(housing.data[:,[6,7]],housing.target)

#得到决策树的图
#指定dtr 和 feature_names
dot_data=tree.export_graphviz(
        dtr,
        out_file=None,
        feature_names=housing.feature_names[6:8],
        filled=True,
        impurity=False,
        rounded=True)

#pip install pydotplus
import pydotplus
graph=pydotplus.graph_from_dot_data(dot_data)
graph.get_nodes()[7].set_fillcolor('#FFF2DD')
from IPython.display import Image
Image(graph.create_png())

graph.write_png("de_tree.png")

#将数据集切分成训练集和测试集
#把数据传进来,取10%为测试集
#为了随机的结果是一致的,指定random_state
from sklearn.model_selection import train_test_split
data_train, data_test, target_train,target_test= train_test_split(housing.data,housing.target,test_size=0.1,random_state=42)
dtr=tree.DecisionTreeRegressor(random_state=42)
dtr.fit(data_train,target_train)
print(dtr.score(data_test,target_test))

from sklearn.ensemble import RandomForestRegressor
rfr=RandomForestRegressor(random_state=42)
rfr.fit(data_train,target_train)
print(rfr.score(data_test,target_test))

#参数的遍历选择,交叉验证
from sklearn.grid_search import GridSearchCV
tree_param_grid ={'min_samples_split':list((3,6,9)),'n_estimators':list((10,50,100))}
grid=GridSearchCV(RandomForestRegressor(),param_grid=tree_param_grid,cv=5)
grid.fit(data_train,target_train)
print(grid.grid_scores_,grid.best_params_,grid.best_score_)

你可能感兴趣的:(Python,MachineLearning)