lightgbm实现


#交叉验证:用于验证某组参数的表现情况并画图,返回为每一轮迭代的评价值
def lightgbm_cv(params,x_train,y_train,n_round,nfold,early_stopping_rounds):
    import lightgbm as lgb
    import pandas as pd
    from matplotlib import pyplot
    get_ipython().magic('matplotlib inline')
    d_train = lgb.Dataset(x_train,y_train)
    cv_result = lgb.cv(params,d_train,
                       num_boost_round = n_round,
                       nfold=nfold,
                       early_stopping_rounds=early_stopping_rounds,
                       shuffle=True,
                       metrics='mse',
                       feature_name='auto',
                       categorical_feature='auto')
    cv_result = pd.DataFrame(cv_result)
    n_estimators = cv_result.shape[0]
    name = cv_result.columns
    eval_ = cv_result[name[0]]
    x_axis = range(0,n_estimators)
    pyplot.subplots(1,1,figsize=(6,6))
    pyplot.plot(x_axis,eval_)
    pyplot.title('lightgbm n_estimator vs %s'%name[0])
    pyplot.xlabel('n_estimators')
    pyplot.ylabel('%s'%name[0])
    return cv_result
#待调参完毕后,将参数用于全体数据的训练
def train_predict_lightgmb_model(params,x_train,y_train,x_test,n_round,early_stopping_rounds):
    #待加入验证集和eval
    import lightgbm as lgb
    import time
    X_train = lgb.Dataset(x_train,y_train)
    t1 = time.time()
    print('training start...')
    model = lgb.train(params,X_train,
              num_boost_round = n_round,
#                early_stopping_rounds=early_stopping_rounds,
               feature_name='auto',
               categorical_feature='auto')
    t2 = time.time()
    print('training end','time cost:%s'%(t2-t1))
    
    print('predict start')
    res = model.predict(x_test.values)
    t3 = time.time()
    print('predict end','time cost:%s'%(t3-t2))
    res = [int(x) for x in res]
    return res
#使用gridsearch进行网格调参
def lightgbm_grisearch(estimator,params,x_train,y_train):
    from sklearn.model_selection import GridSearchCV
    gsearch = GridSearchCV(estimator=estimator,param_grid=params,scoring='neg_mean_absolute_error',cv = 5,n_jobs=-1)
    gsearch.fit(x_train,y_train)
    return gsearch.grid_scores_, gsearch.best_params_, gsearch.best_score_

 

你可能感兴趣的:(学习笔记)