网格搜索最优参数

from sklearn.linear_model import LogisticRegression # 逻辑回归
from sklearn.neighbors import KNeighborsClassifier  # K近邻
from sklearn.svm import SVC                         # 支持向量机
from sklearn.tree import DecisionTreeClassifier     # 决策树
from sklearn.ensemble import RandomForestClassifier # 随机森林
from sklearn.ensemble import AdaBoostClassifier     # AdaBoost
from xgboost.sklearn import XGBClassifier           # Xgboost
from lightgbm.sklearn import LGBMClassifier         # lightgbm

def grid_search(pipeline,param_grid,x_train,y_train,x_test,y_test,num_folds=5,metrics='accuracy'):
    response={}
    gs = GridSearchCV(estimator=pipeline,
                      param_grid=param_grid,
                      cv=num_folds,
                      scoring=metrics)
    search=gs.fit(x_train,y_train)
    print('GridSearch 最优参数:{},最优分数:{}'.format(search.best_params_,search.best_score_))
    y_pred = gs.predict(x_test)
    if metrics=='roc_auc':
        y_prob=gs.predict_proba(x_test)[:,1]
        score=roc_auc_score(x_test, y_prob)
        print('auc得分',score)
    else:
        score=eval("{}_score".format(metrics))(y_test,y_pred)
        print('{}得分:{:.3f}'.format(metrics,score))
    response['y_pred']=y_pred
    response[metrics]=score
    return response


from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

classifiers=[LogisticRegression(solver='liblinear'),
             KNeighborsClassifier(metric='minkowski'),
             SVC(kernel='rbf'), 
             DecisionTreeClassifier(criterion='gini'),
             RandomForestClassifier(criterion='gini'),
             AdaBoostClassifier(),
             XGBClassifier(),
             LGBMClassifier(is_unbalance=True)]
classifier_name=['lr',
                 'kneighborsclassifier',
                 'svc',
                 'decisiontreeclassifier',
                 'randomforestclassifier',
                 'adaboostclassifier',
                 'xgboost',
                 'lightgbm']
classifier_param_grid=[{'lr__C':[0.001, 0.01, 0.1, 1]},
                       {'kneighborsclassifier__n_neighbors':[4,6,8]},
                       {'svc__C':[0.01,0.1,1], 'svc__gamma':[0.01,0.1,1]},
                       {'decisiontreeclassifier__max_depth':range(5,11)},
                       {'randomforestclassifier__n_estimators':range(1,11)},
                       {'adaboostclassifier__n_estimators':range(70,121,10)},
                       {'xgboost__max_depth':[5,7,9]},
                       {'lightgbm__max_depth':range(4,10)}]

for model,model_name,model_param_grid in zip(classifiers,classifier_name,classifier_param_grid):
    pipeline = Pipeline([("scaler",MinMaxScaler()),(model_name,model)])
    res=grid_search(pipeline,model_param_grid,X_train,y_train,X_test,y_test)

你可能感兴趣的:(数据分析入门)