Sklearn常用分类器总结

常用的分类器:

SVM、KNN、贝叶斯、线性回归、逻辑回归、决策树、随机森林、xgboost、GBDT、boosting、神经网络NN。

### KNN Classifier    
from sklearn.neighbors import KNeighborsClassifier
 
clf = KNeighborsClassifier()
clf.fit(train_x, train_y)
__________________________________________________________
 
### Logistic Regression Classifier    
from sklearn.linear_model import LogisticRegression
 
clf = LogisticRegression(penalty='l2')
clf.fit(train_x, train_y)
__________________________________________________________
 
### Random Forest Classifier    
from sklearn.ensemble import RandomForestClassifier
 
clf = RandomForestClassifier(n_estimators=8)
clf.fit(train_x, train_y)
__________________________________________________________
 
### Decision Tree Classifier    
from sklearn import tree
 
clf = tree.DecisionTreeClassifier()
clf.fit(train_x, train_y)
__________________________________________________________
 
### GBDT(Gradient Boosting Decision Tree) Classifier    
from sklearn.ensemble import GradientBoostingClassifier
 
clf = GradientBoostingClassifier(n_estimators=200)
clf.fit(train_x, train_y)
__________________________________________________________
 
###AdaBoost Classifier
from sklearn.ensemble import  AdaBoostClassifier
 
clf = AdaBoostClassifier()
clf.fit(train_x, train_y)
__________________________________________________________
 
### GaussianNB
from sklearn.naive_bayes import GaussianNB
 
clf = GaussianNB()
clf.fit(train_x, train_y)
__________________________________________________________
 
### Linear Discriminant Analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 
clf = LinearDiscriminantAnalysis()
clf.fit(train_x, train_y)
__________________________________________________________
 
### Quadratic Discriminant Analysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
 
clf = QuadraticDiscriminantAnalysis()
clf.fit(train_x, train_y)
__________________________________________________________
 
### SVM Classifier    
from sklearn.svm import SVC
 
clf = SVC(kernel='rbf', probability=True)
clf.fit(train_x, train_y)
__________________________________________________________
 
### Multinomial Naive Bayes Classifier    
from sklearn.naive_bayes import MultinomialNB
 
clf = MultinomialNB(alpha=0.01)
clf.fit(train_x, train_y)
__________________________________________________________

from sklearn.metrics import precision_recall_fscore_support

def timeDecor(func):
#一个用于统计函数运行时间的装饰器
 
    def innerDef(*args, **kwargs):
        t1 = time.time()
        result = func(*args, **kwargs)
        t2 = time.time()
        t = t2 - t1
        print "{0}函数部分运行时间 :{1}s".format(str(func.__name__),t)
        return result
    return innerDef

@timeDecor
def svm_classify(X_train, y_train, X_test, y_test):
    from sklearn import svm

    param_grid = {
                  # 'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                  'kernel': ['rbf','linear','poly','sigmoid'],
                  # 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
                  }
    t0 = time()
    clf = svm.SVC()
    clf.fit(X_train, y_train)
    # print(clf.best_params_)
    print("svm done in %0.3fs" % (time() - t0))
    pre_y_train = clf.predict(X_train)
    pre_y_test = clf.predict(X_test)
    print("SVM Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def rf_classify(X_train, y_train, X_test, y_test):
    from sklearn.ensemble import RandomForestClassifier

    t0 = time()
    clf = RandomForestClassifier(random_state=0, n_estimators=500)
    clf.fit(X_train, y_train)
    print("rf done in %0.3fs" % (time() - t0))
    pre_y_train = clf.predict(X_train)
    pre_y_test = clf.predict(X_test)
    print("rf Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def knn_classify(X_train, y_train, X_test, y_test):
    from sklearn.neighbors import KNeighborsClassifier
 
    t0 = time()
    clf = KNeighborsClassifier(n_neighbors=5)
    clf.fit(X_train, y_train)
    print("knn done in %0.3fs" % (time() - t0))
    pre_y_train = clf.predict(X_train)
    pre_y_test = clf.predict(X_test)
    print("knn Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def bagging_knn_classify(X_train, y_train, X_test, y_test):
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.ensemble import BaggingClassifier
 
    t0 = time()
    clf = BaggingClassifier(KNeighborsClassifier(),
                            max_samples=0.5, max_features=0.5)
    clf.fit(X_train, y_train)
    print("bagging_knn done in %0.3fs" % (time() - t0))
    pre_y_test = clf.predict(X_test)
    print("bagging_knn Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def lr_classify(X_train, y_train, X_test, y_test):
    from sklearn.linear_model import LogisticRegression
 
    t0 = time()
    clf = LogisticRegression(C=1e5)
    clf.fit(X_train, y_train)
    print("lr done in %0.3fs" % (time() - t0))
    pre_y_train = clf.predict(X_train)
    pre_y_test = clf.predict(X_test)
    print("lr Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def nb_classify(X_train, y_train, X_test, y_test):
    from sklearn.naive_bayes import GaussianNB
 
    t0 = time()
    clf = GaussianNB()
    clf.fit(X_train, y_train)
    print("nb done in %0.3fs" % (time() - t0))
    pre_y_train = clf.predict(X_train)
    pre_y_test = clf.predict(X_test)
    print("nb Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def da_classify(X_train, y_train, X_test, y_test):
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
 
    t0 = time()
    clf = QuadraticDiscriminantAnalysis()
    clf.fit(X_train, y_train)
    print("da done in %0.3fs" % (time() - t0))
    pre_y_test = clf.predict(X_test)
    print("da Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))

 
 
@timeDecor
def decisionTree_classify(X_train, y_train, X_test, y_test):
    from sklearn.tree import DecisionTreeClassifier
 
    t0 = time()
    clf = DecisionTreeClassifier(max_depth=5)
    clf.fit(X_train, y_train)
    print("DT done in %0.3fs" % (time() - t0))
    pre_y_test = clf.predict(X_test)
    print("DT Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def xgboost_classify(X_train, y_train, X_test, y_test):
    import xgboost
 
    t0 = time()
    clf = xgboost.XGBClassifier()
    clf.fit(X_train, y_train)
    print("xgboost done in %0.3fs" % (time() - t0))
    pre_y_test = clf.predict(X_test)
    print("xgboost Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def GBDT_classify(X_train, y_train, X_test, y_test):
    from sklearn.ensemble import GradientBoostingClassifier
 
    t0 = time()
    clf = GradientBoostingClassifier(n_estimators=200)
    clf.fit(X_train, y_train)
    print("GBDT done in %0.3fs" % (time() - t0))
    pre_y_test = clf.predict(X_test)
    print("GBDT Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
 
 
@timeDecor
def voting_classify(X_train, y_train, X_test, y_test):
    from sklearn.ensemble import GradientBoostingClassifier, VotingClassifier, RandomForestClassifier
    import xgboost
    from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import GaussianNB
 
    t0 = time()
    clf1 = GradientBoostingClassifier(n_estimators=200)
    clf2 = RandomForestClassifier(random_state=0, n_estimators=500)
    # clf3 = LogisticRegression(random_state=1)
    # clf4 = GaussianNB()
    clf5 = xgboost.XGBClassifier()
    clf = VotingClassifier(estimators=[
        # ('gbdt',clf1),
        ('rf',clf2),
        # ('lr',clf3),
        # ('nb',clf4),
        # ('xgboost',clf5),
    ],
        voting='soft'
    )
    clf.fit(X_train, y_train)
    print("voting done in %0.3fs" % (time() - t0))
    pre_y_test = clf.predict(X_test)
    print("voting Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))

 参考链接:https://blog.csdn.net/u014180259/article/details/53580589

你可能感兴趣的:(python,人工智能)