sklearn - k折交叉验证简单使用

如何对多模型进行k折交叉验证+挑选最好的模型进行预测?

cross_validate运用

from sklearn import datasets, linear_model
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix
from sklearn.svm import LinearSVC
# 构造数据
diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
lasso = linear_model.Lasso()
cv_results = cross_validate(lasso, X, y, cv=3,
							return_estimator = True # 是否返回模型)
print(sorted(cv_results.keys()))
print(cv_results)
'''
[out]:
['estimator', 'fit_time', 'score_time', 'test_score']
//
{'fit_time': array([0.        , 0.00099611, 0.        ]),
 'score_time': array([0.00099874, 0.        , 0.00099754]),
 'estimator': [Lasso(), Lasso(), Lasso()],
 'test_score': array([0.33150734, 0.08022311, 0.03531764])}
'''
# 选择k折交叉验证中的第0个模型(因为0.3315073得分比较高)用于预测
res = cv_results["estimator"][0].predict(diabetes.data[:150])

sklearn官方索引:sklearn.model_selection.cross_validate
具体例子:

from sklearn.model_selection import cross_validate # 交叉验证
# 导入各种模型
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from xgboost.sklearn import XGBRegressor
from lightgbm.sklearn import LGBMRegressor
from catboost import CatBoostRegressor
# 模型构建
models = [LinearRegression(),
          DecisionTreeRegressor(random_state=100),
          RandomForestRegressor(random_state=100),
          GradientBoostingRegressor(random_state=100),
          MLPRegressor(solver='lbfgs', max_iter=100,random_state=100), 
          XGBRegressor(n_estimators = 100, objective='reg:squarederror',random_state=100), 
          LGBMRegressor(n_estimators = 100,random_state=100),
          CatBoostRegressor(random_state=100)]
result_score = dict() # 存放各个模型评价分数
result_estimator = dict() # 存放各个模型
for model in models:
    model_name = str(model).split('(')[0]
    scores = cross_validate(model, X=X, y=Y_ln, verbose=0, cv = 5, scoring=my_scorer,return_estimator = True)
    result_score[model_name] = scores['test_score']
    result_estimator[model_name] = scores['estimator']
    print(model_name + ' is finished')
# 用于展示模型
result_score = pd.DataFrame(result_score)
result_score.index = ['cv' + str(x) for x in range(1, 6)]
# 选取最好的
res = result_estimator["GradientBoostingRegressor"][2].predict(val_X)  # [2]是挑选第三个GradientBoostingRegressor模型进行选择,并且返回预测结果

k折交叉验证中如何创建自己的评价函数

Sklearn version is 0.24.2

from sklearn.metrics import make_scorer # 构造属于自己的交叉验证函数库
from sklearn.model_selection import cross_val_score # 导入sklearn的交叉验证
def Score(pred,label, **kwargs):
	#这边自己写
    my_ape = abs(pred-label)/label
    index = np.where(my_ape<=0.05)
    Mape = np.mean(my_ape)
    Accuracy = len(index[0])/len(label)
    return 0.2*(1-Mape)+0.8*Accuracy
my_scorer = make_scorer(Score, greater_is_better=True)
scores = cross_val_score(model, X=train_X, y=train_y, verbose=1, cv = 5, scoring=my_scorer) #

sklearn 官方库索引:
make_scorer

你可能感兴趣的:(数据分析,sklearn,机器学习,python,数据分析)