python创建一个特征选择分类器

导入相应的模块

from sklearn import datasets

iris = datasets.load_iris()
X, y = iris.data[:, :], iris.target

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import EnsembleVoteClassifier
from sklearn.pipeline import Pipeline
from mlxtend.feature_selection import SequentialFeatureSelector

基础模型建立

clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()

创建一个模型选择分类器pipeline

sfs1 = SequentialFeatureSelector(
    clf1,
    k_features=4,
    forward=True,
    floating=False,
    scoring='accuracy',
    verbose=0,
    cv=0)

clf1_pipe = Pipeline([('sfs', sfs1), ('logreg', clf1)])

eclf = EnsembleVoteClassifier(clfs=[clf1_pipe, clf2, clf3], voting='soft')

params = {
    'pipeline__sfs__k_features': [1, 2, 3],
    'pipeline__logreg__C': [1.0, 100.0],
    'randomforestclassifier__n_estimators': [20, 200]
}

grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
grid.fit(iris.data, iris.target)

cv_keys = ('mean_test_score', 'std_test_score', 'params')

for r, _ in enumerate(grid.cv_results_['mean_test_score']):
    print("%0.3f +/- %0.2f %r" % (grid.cv_results_[cv_keys[0]][r],
                                  grid.cv_results_[cv_keys[1]][r] / 2.0,
                                  grid.cv_results_[cv_keys[2]][r]))

你可能感兴趣的:(机器学习)