from sklearn.model_selection import cross_val_score,KFold
定义交叉验证规则
n_folds = 5
rmse=[]
def rmsle_cv(model):
kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train.values)
score= np.sqrt(-cross_val_score(model, train.values, y_train, scoring="neg_mean_squared_error", cv = kf))
rmse.append(np.mean(score))
return(rmse)
单个模型
params = [1,2,3,4,5,6]
test_scores = []
for param in params:
clf = XGBRegressor(max_depth=param)
test_score = np.sqrt(-cross_val_score(clf, X_train, y_train, cv=10, scoring='neg_mean_squared_error'))
test_scores.append(np.mean(test_score))
plt.plot(max_depth, test_scores)
plt.title("max_depth vs CV Error")
多个模型
models = [
Pipeline([
('ss', StandardScaler()),
('poly', PolynomialFeatures()),
('linear', RidgeCV(alphas=np.logspace(-3,1,20)))
]),
Pipeline([
('ss', StandardScaler()),
('poly', PolynomialFeatures()),
('linear', LassoCV(alphas=np.logspace(-3,1,20)))
])
]
# 参数字典, 字典中的key是属性的名称,value是可选的参数列表
parameters = {
"poly__degree": [3,2,1],
"poly__interaction_only": [True, False],#不产生交互项,如X1*X1
"poly__include_bias": [True, False],#多项式幂为零的特征作为线性模型中的截距
"linear__fit_intercept": [True, False]
}
titles=['RidgeCV','LassoCV']
for t in range(2):
model = GridSearchCV(models[t], param_grid=parameters,cv=5, n_jobs=1)
model.fit(x_train, y_train)
print ("%s算法:最优参数:" % titles[t],model.best_params_)
print ("%s算法:=%.3f" % (titles[t], model.best_score_))