SVM、KNN、贝叶斯、线性回归、逻辑回归、决策树、随机森林、xgboost、GBDT、boosting、神经网络NN。
### KNN Classifier
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier()
clf.fit(train_x, train_y)
__________________________________________________________
### Logistic Regression Classifier
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(penalty='l2')
clf.fit(train_x, train_y)
__________________________________________________________
### Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=8)
clf.fit(train_x, train_y)
__________________________________________________________
### Decision Tree Classifier
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf.fit(train_x, train_y)
__________________________________________________________
### GBDT(Gradient Boosting Decision Tree) Classifier
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=200)
clf.fit(train_x, train_y)
__________________________________________________________
###AdaBoost Classifier
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier()
clf.fit(train_x, train_y)
__________________________________________________________
### GaussianNB
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(train_x, train_y)
__________________________________________________________
### Linear Discriminant Analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
clf = LinearDiscriminantAnalysis()
clf.fit(train_x, train_y)
__________________________________________________________
### Quadratic Discriminant Analysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
clf = QuadraticDiscriminantAnalysis()
clf.fit(train_x, train_y)
__________________________________________________________
### SVM Classifier
from sklearn.svm import SVC
clf = SVC(kernel='rbf', probability=True)
clf.fit(train_x, train_y)
__________________________________________________________
### Multinomial Naive Bayes Classifier
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB(alpha=0.01)
clf.fit(train_x, train_y)
__________________________________________________________
from sklearn.metrics import precision_recall_fscore_support
def timeDecor(func):
#一个用于统计函数运行时间的装饰器
def innerDef(*args, **kwargs):
t1 = time.time()
result = func(*args, **kwargs)
t2 = time.time()
t = t2 - t1
print "{0}函数部分运行时间 :{1}s".format(str(func.__name__),t)
return result
return innerDef
@timeDecor
def svm_classify(X_train, y_train, X_test, y_test):
from sklearn import svm
param_grid = {
# 'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'kernel': ['rbf','linear','poly','sigmoid'],
# 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
}
t0 = time()
clf = svm.SVC()
clf.fit(X_train, y_train)
# print(clf.best_params_)
print("svm done in %0.3fs" % (time() - t0))
pre_y_train = clf.predict(X_train)
pre_y_test = clf.predict(X_test)
print("SVM Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def rf_classify(X_train, y_train, X_test, y_test):
from sklearn.ensemble import RandomForestClassifier
t0 = time()
clf = RandomForestClassifier(random_state=0, n_estimators=500)
clf.fit(X_train, y_train)
print("rf done in %0.3fs" % (time() - t0))
pre_y_train = clf.predict(X_train)
pre_y_test = clf.predict(X_test)
print("rf Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def knn_classify(X_train, y_train, X_test, y_test):
from sklearn.neighbors import KNeighborsClassifier
t0 = time()
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train, y_train)
print("knn done in %0.3fs" % (time() - t0))
pre_y_train = clf.predict(X_train)
pre_y_test = clf.predict(X_test)
print("knn Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def bagging_knn_classify(X_train, y_train, X_test, y_test):
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier
t0 = time()
clf = BaggingClassifier(KNeighborsClassifier(),
max_samples=0.5, max_features=0.5)
clf.fit(X_train, y_train)
print("bagging_knn done in %0.3fs" % (time() - t0))
pre_y_test = clf.predict(X_test)
print("bagging_knn Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def lr_classify(X_train, y_train, X_test, y_test):
from sklearn.linear_model import LogisticRegression
t0 = time()
clf = LogisticRegression(C=1e5)
clf.fit(X_train, y_train)
print("lr done in %0.3fs" % (time() - t0))
pre_y_train = clf.predict(X_train)
pre_y_test = clf.predict(X_test)
print("lr Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def nb_classify(X_train, y_train, X_test, y_test):
from sklearn.naive_bayes import GaussianNB
t0 = time()
clf = GaussianNB()
clf.fit(X_train, y_train)
print("nb done in %0.3fs" % (time() - t0))
pre_y_train = clf.predict(X_train)
pre_y_test = clf.predict(X_test)
print("nb Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def da_classify(X_train, y_train, X_test, y_test):
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
t0 = time()
clf = QuadraticDiscriminantAnalysis()
clf.fit(X_train, y_train)
print("da done in %0.3fs" % (time() - t0))
pre_y_test = clf.predict(X_test)
print("da Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def decisionTree_classify(X_train, y_train, X_test, y_test):
from sklearn.tree import DecisionTreeClassifier
t0 = time()
clf = DecisionTreeClassifier(max_depth=5)
clf.fit(X_train, y_train)
print("DT done in %0.3fs" % (time() - t0))
pre_y_test = clf.predict(X_test)
print("DT Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def xgboost_classify(X_train, y_train, X_test, y_test):
import xgboost
t0 = time()
clf = xgboost.XGBClassifier()
clf.fit(X_train, y_train)
print("xgboost done in %0.3fs" % (time() - t0))
pre_y_test = clf.predict(X_test)
print("xgboost Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def GBDT_classify(X_train, y_train, X_test, y_test):
from sklearn.ensemble import GradientBoostingClassifier
t0 = time()
clf = GradientBoostingClassifier(n_estimators=200)
clf.fit(X_train, y_train)
print("GBDT done in %0.3fs" % (time() - t0))
pre_y_test = clf.predict(X_test)
print("GBDT Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def voting_classify(X_train, y_train, X_test, y_test):
from sklearn.ensemble import GradientBoostingClassifier, VotingClassifier, RandomForestClassifier
import xgboost
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
t0 = time()
clf1 = GradientBoostingClassifier(n_estimators=200)
clf2 = RandomForestClassifier(random_state=0, n_estimators=500)
# clf3 = LogisticRegression(random_state=1)
# clf4 = GaussianNB()
clf5 = xgboost.XGBClassifier()
clf = VotingClassifier(estimators=[
# ('gbdt',clf1),
('rf',clf2),
# ('lr',clf3),
# ('nb',clf4),
# ('xgboost',clf5),
],
voting='soft'
)
clf.fit(X_train, y_train)
print("voting done in %0.3fs" % (time() - t0))
pre_y_test = clf.predict(X_test)
print("voting Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))
参考链接:https://blog.csdn.net/u014180259/article/details/53580589