pipelining in sklearn

import numpy
from sklearn import linear_model, decomposition, pipeline, model_selection, datasets

pca = decomposition.PCA()
logistic = linear_model.SGDClassifier('log')
pipe = pipeline.Pipeline([('pca', pca), ('logistic', logistic)])

digit = datasets.load_digits()
x = digit.data
y = digit.target

gs = model_selection.GridSearchCV(pipe, dict(pca__n_components=[5, 20, 30, 40, 50, 64], logistic__alpha=numpy.logspace(-4, 4, 5)), cv=5, n_jobs=-1, iid=False)
gs.fit(x, y)
print(gs.best_params_)
print(gs.best_score_)

pca.fit(x)
import matplotlib.pyplot as plt
plt.figure()
plt.plot(pca.explained_variance_ratio_)
plt.axvline(gs.best_estimator_.named_steps['pca'].n_components, linestyle=':')
plt.show()

 

你可能感兴趣的:(pipelining in sklearn)