sklearn常用分类器及代码实现

常用的分类器包括SVM、KNN、贝叶斯、线性回归、逻辑回归、决策树、随机森林、xgboost、GBDT、boosting、神经网络NN。



代码如下:

from   sklearn.metrics  import   precision_recall_fscore_support

def   timeDecor(func):
#一个用于统计函数运行时间的装饰器
 
     def   innerDef( * args,  ** kwargs):
         t1  =   time.time()
         result  =   func( * args,  ** kwargs)
         t2  =   time.time()
         =   t2  -   t1
         print   "{0}函数部分运行时间 :{1}s" . format ( str (func.__name__),t)
         return   result
     return   innerDef

@timeDecor
def   svm_classify(X_train, y_train, X_test, y_test):
     from   sklearn  import   svm

     param_grid  =   {
                   # 'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                   'kernel' : [ 'rbf' , 'linear' , 'poly' , 'sigmoid' ],
                   # 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
                   }
     t0  =   time()
     clf  =   svm.SVC()
     clf.fit(X_train, y_train)
     # print(clf.best_params_)
     print ( "svm done in %0.3fs"   %   (time()  -   t0))
     pre_y_train  =   clf.predict(X_train)
     pre_y_test  =   clf.predict(X_test)
     print ( "SVM Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def   rf_classify(X_train, y_train, X_test, y_test):
     from   sklearn.ensemble  import   RandomForestClassifier

     t0  =   time()
     clf  =   RandomForestClassifier(random_state = 0 , n_estimators = 500 )
     clf.fit(X_train, y_train)
     print ( "rf done in %0.3fs"   %   (time()  -   t0))
     pre_y_train  =   clf.predict(X_train)
     pre_y_test  =   clf.predict(X_test)
     print ( "rf Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def   knn_classify(X_train, y_train, X_test, y_test):
     from   sklearn.neighbors  import   KNeighborsClassifier
 
     t0  =   time()
     clf  =   KNeighborsClassifier(n_neighbors = 5 )
     clf.fit(X_train, y_train)
     print ( "knn done in %0.3fs"   %   (time()  -   t0))
     pre_y_train  =   clf.predict(X_train)
     pre_y_test  =   clf.predict(X_test)
     print ( "knn Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def   bagging_knn_classify(X_train, y_train, X_test, y_test):
     from   sklearn.neighbors  import   KNeighborsClassifier
     from   sklearn.ensemble  import   BaggingClassifier
 
     t0  =   time()
     clf  =   BaggingClassifier(KNeighborsClassifier(),
                             max_samples = 0.5 , max_features = 0.5 )
     clf.fit(X_train, y_train)
     print ( "bagging_knn done in %0.3fs"   %   (time()  -   t0))
     pre_y_test  =   clf.predict(X_test)
     print ( "bagging_knn Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def   lr_classify(X_train, y_train, X_test, y_test):
     from   sklearn.linear_model  import   LogisticRegression
 
     t0  =   time()
     clf  =   LogisticRegression(C = 1e5 )
     clf.fit(X_train, y_train)
     print ( "lr done in %0.3fs"   %   (time()  -   t0))
     pre_y_train  =   clf.predict(X_train)
     pre_y_test  =   clf.predict(X_test)
     print ( "lr Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def   nb_classify(X_train, y_train, X_test, y_test):
     from   sklearn.naive_bayes  import   GaussianNB
 
     t0  =   time()
     clf  =   GaussianNB()
     clf.fit(X_train, y_train)
     print ( "nb done in %0.3fs"   %   (time()  -   t0))
     pre_y_train  =   clf.predict(X_train)
     pre_y_test  =   clf.predict(X_test)
     print ( "nb Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def   da_classify(X_train, y_train, X_test, y_test):
     from   sklearn.discriminant_analysis  import   QuadraticDiscriminantAnalysis
 
     t0  =   time()
     clf  =   QuadraticDiscriminantAnalysis()
     clf.fit(X_train, y_train)
     print ( "da done in %0.3fs"   %   (time()  -   t0))
     pre_y_test  =   clf.predict(X_test)
     print ( "da Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))

 
 
@timeDecor
def   decisionTree_classify(X_train, y_train, X_test, y_test):
     from   sklearn.tree  import   DecisionTreeClassifier
 
     t0  =   time()
     clf  =   DecisionTreeClassifier(max_depth = 5 )
     clf.fit(X_train, y_train)
     print ( "DT done in %0.3fs"   %   (time()  -   t0))
     pre_y_test  =   clf.predict(X_test)
     print ( "DT Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def   xgboost_classify(X_train, y_train, X_test, y_test):
     import   xgboost
 
     t0  =   time()
     clf  =   xgboost.XGBClassifier()
     clf.fit(X_train, y_train)
     print ( "xgboost done in %0.3fs"   %   (time()  -   t0))
     pre_y_test  =   clf.predict(X_test)
     print ( "xgboost Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def   GBDT_classify(X_train, y_train, X_test, y_test):
     from   sklearn.ensemble  import   GradientBoostingClassifier
 
     t0  =   time()
     clf  =   GradientBoostingClassifier(n_estimators = 200 )
     clf.fit(X_train, y_train)
     print ( "GBDT done in %0.3fs"   %   (time()  -   t0))
     pre_y_test  =   clf.predict(X_test)
     print ( "GBDT Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def   voting_classify(X_train, y_train, X_test, y_test):
     from   sklearn.ensemble  import   GradientBoostingClassifier, VotingClassifier, RandomForestClassifier
     import   xgboost
     from   sklearn.linear_model  import   LogisticRegression
     from   sklearn.naive_bayes  import   GaussianNB
 
     t0  =   time()
     clf1  =   GradientBoostingClassifier(n_estimators = 200 )
     clf2  =   RandomForestClassifier(random_state = 0 , n_estimators = 500 )
     # clf3 = LogisticRegression(random_state=1)
     # clf4 = GaussianNB()
     clf5  =   xgboost.XGBClassifier()
     clf  =   VotingClassifier(estimators = [
         # ('gbdt',clf1),
         ( 'rf' ,clf2),
         # ('lr',clf3),
         # ('nb',clf4),
         # ('xgboost',clf5),
     ],
         voting = 'soft'
     )
     clf.fit(X_train, y_train)
     print ( "voting done in %0.3fs"   %   (time()  -   t0))
     pre_y_test  =   clf.predict(X_test)
     print ( "voting Metrics : {0}" . format (precision_recall_fscore_support(y_test, pre_y_test)))












你可能感兴趣的:(sklearn)