#交叉验证:用于验证某组参数的表现情况并画图,返回为每一轮迭代的评价值
def lightgbm_cv(params,x_train,y_train,n_round,nfold,early_stopping_rounds):
import lightgbm as lgb
import pandas as pd
from matplotlib import pyplot
get_ipython().magic('matplotlib inline')
d_train = lgb.Dataset(x_train,y_train)
cv_result = lgb.cv(params,d_train,
num_boost_round = n_round,
nfold=nfold,
early_stopping_rounds=early_stopping_rounds,
shuffle=True,
metrics='mse',
feature_name='auto',
categorical_feature='auto')
cv_result = pd.DataFrame(cv_result)
n_estimators = cv_result.shape[0]
name = cv_result.columns
eval_ = cv_result[name[0]]
x_axis = range(0,n_estimators)
pyplot.subplots(1,1,figsize=(6,6))
pyplot.plot(x_axis,eval_)
pyplot.title('lightgbm n_estimator vs %s'%name[0])
pyplot.xlabel('n_estimators')
pyplot.ylabel('%s'%name[0])
return cv_result
#待调参完毕后,将参数用于全体数据的训练
def train_predict_lightgmb_model(params,x_train,y_train,x_test,n_round,early_stopping_rounds):
#待加入验证集和eval
import lightgbm as lgb
import time
X_train = lgb.Dataset(x_train,y_train)
t1 = time.time()
print('training start...')
model = lgb.train(params,X_train,
num_boost_round = n_round,
# early_stopping_rounds=early_stopping_rounds,
feature_name='auto',
categorical_feature='auto')
t2 = time.time()
print('training end','time cost:%s'%(t2-t1))
print('predict start')
res = model.predict(x_test.values)
t3 = time.time()
print('predict end','time cost:%s'%(t3-t2))
res = [int(x) for x in res]
return res
#使用gridsearch进行网格调参
def lightgbm_grisearch(estimator,params,x_train,y_train):
from sklearn.model_selection import GridSearchCV
gsearch = GridSearchCV(estimator=estimator,param_grid=params,scoring='neg_mean_absolute_error',cv = 5,n_jobs=-1)
gsearch.fit(x_train,y_train)
return gsearch.grid_scores_, gsearch.best_params_, gsearch.best_score_