cross_validate运用
from sklearn import datasets, linear_model
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix
from sklearn.svm import LinearSVC
# 构造数据
diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
lasso = linear_model.Lasso()
cv_results = cross_validate(lasso, X, y, cv=3,
return_estimator = True # 是否返回模型)
print(sorted(cv_results.keys()))
print(cv_results)
'''
[out]:
['estimator', 'fit_time', 'score_time', 'test_score']
//
{'fit_time': array([0. , 0.00099611, 0. ]),
'score_time': array([0.00099874, 0. , 0.00099754]),
'estimator': [Lasso(), Lasso(), Lasso()],
'test_score': array([0.33150734, 0.08022311, 0.03531764])}
'''
# 选择k折交叉验证中的第0个模型(因为0.3315073得分比较高)用于预测
res = cv_results["estimator"][0].predict(diabetes.data[:150])
sklearn官方索引:sklearn.model_selection.cross_validate
具体例子:
from sklearn.model_selection import cross_validate # 交叉验证
# 导入各种模型
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from xgboost.sklearn import XGBRegressor
from lightgbm.sklearn import LGBMRegressor
from catboost import CatBoostRegressor
# 模型构建
models = [LinearRegression(),
DecisionTreeRegressor(random_state=100),
RandomForestRegressor(random_state=100),
GradientBoostingRegressor(random_state=100),
MLPRegressor(solver='lbfgs', max_iter=100,random_state=100),
XGBRegressor(n_estimators = 100, objective='reg:squarederror',random_state=100),
LGBMRegressor(n_estimators = 100,random_state=100),
CatBoostRegressor(random_state=100)]
result_score = dict() # 存放各个模型评价分数
result_estimator = dict() # 存放各个模型
for model in models:
model_name = str(model).split('(')[0]
scores = cross_validate(model, X=X, y=Y_ln, verbose=0, cv = 5, scoring=my_scorer,return_estimator = True)
result_score[model_name] = scores['test_score']
result_estimator[model_name] = scores['estimator']
print(model_name + ' is finished')
# 用于展示模型
result_score = pd.DataFrame(result_score)
result_score.index = ['cv' + str(x) for x in range(1, 6)]
# 选取最好的
res = result_estimator["GradientBoostingRegressor"][2].predict(val_X) # [2]是挑选第三个GradientBoostingRegressor模型进行选择,并且返回预测结果
Sklearn version is 0.24.2
from sklearn.metrics import make_scorer # 构造属于自己的交叉验证函数库
from sklearn.model_selection import cross_val_score # 导入sklearn的交叉验证
def Score(pred,label, **kwargs):
#这边自己写
my_ape = abs(pred-label)/label
index = np.where(my_ape<=0.05)
Mape = np.mean(my_ape)
Accuracy = len(index[0])/len(label)
return 0.2*(1-Mape)+0.8*Accuracy
my_scorer = make_scorer(Score, greater_is_better=True)
scores = cross_val_score(model, X=train_X, y=train_y, verbose=1, cv = 5, scoring=my_scorer) #
sklearn 官方库索引:
make_scorer