from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingCVClassifier
import matplotlib.pyplot as plt
iris=load_iris()
X,y=iris.data[:,1:3],iris.target
RANDOM_SEED=42
# 建立基评估器
clf1=KNeighborsClassifier(3)
clf2=RandomForestClassifier(n_estimators=100,random_state=RANDOM_SEED)
clf3=GaussianNB()
lr=LogisticRegression()
# 建立两层评估器
sclf=StackingCVClassifier(classifiers=[clf1,clf2,clf3]# 第一层分类器
,meta_classifier=lr#第二层分类器
,random_state=RANDOM_SEED)
print('3-fold cross validation:\n')
for clf,label in zip([clf1,clf2,clf3,sclf],['KNN','RFC','Naive Bayes','StackingClassifier']):
scores=cross_val_score(clf,X,y,cv=3,scoring='accuracy')
print("Accuracy: %0.4f (+/- %0.4f) [%s]"% (scores.mean(), scores.std(), label))
3-fold cross validation:
Accuracy: 0.9733 (+/- 0.0094) [KNN]
Accuracy: 0.9667 (+/- 0.0189) [RFC]
Accuracy: 0.9400 (+/- 0.0163) [Naive Bayes]
Accuracy: 0.9667 (+/- 0.0189) [StackingClassifier]
# 绘制决策边界
from mlxtend.plotting import plot_decision_regions
import matplotlib.gridspec as gridspec
import itertools
gs=gridspec.GridSpec(2,2)
fig=plt.figure(figsize=(10,8))
for clf,lab,grd in zip([clf1,clf2,clf3,sclf],['KNN','RFC','Naive Bayes','StackingClassifier'],itertools.product([0,1],repeat=2)):
clf.fit(X,y)
ax=plt.subplot(gs[grd[0],grd[1]])
fig=plot_decision_regions(X=X,y=y,clf=clf)
plt.title(lab)
plt.show()
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-qazY5SMm-1620908906611)(output_7_0.svg)]
iris=load_iris()
X,y=iris.data,iris.target
clf1=KNeighborsClassifier(3)
clf2=RandomForestClassifier(n_estimators=100,random_state=RANDOM_SEED)
clf3=GaussianNB()
lr=LogisticRegression()
# 建立两层评估器
sclf=StackingCVClassifier(classifiers=[clf1,clf2,clf3]# 第一层分类器
,meta_classifier=lr#第二层分类器
,use_probas=True
,random_state=RANDOM_SEED)
print('3-fold cross validation:\n')
for clf,label in zip([clf1,clf2,clf3,sclf],['KNN','RFC','Naive Bayes','StackingClassifier']):
scores=cross_val_score(clf,X,y,cv=3,scoring='accuracy')
print("Accuracy: %0.4f (+/- %0.4f) [%s]"% (scores.mean(), scores.std(), label))
3-fold cross validation:
Accuracy: 0.9733 (+/- 0.0094) [KNN]
Accuracy: 0.9667 (+/- 0.0189) [RFC]
Accuracy: 0.9400 (+/- 0.0163) [Naive Bayes]
Accuracy: 0.9667 (+/- 0.0189) [StackingClassifier]
clf1=KNeighborsClassifier(3)
clf2=RandomForestClassifier(n_estimators=100,random_state=RANDOM_SEED)
clf3=GaussianNB()
lr=LogisticRegression()
# 建立两层评估器
sclf=StackingCVClassifier(classifiers=[clf1,clf2,clf3]# 第一层分类器
,meta_classifier=lr#第二层分类器
,use_probas=True
,random_state=RANDOM_SEED)
print('3-fold cross validation:\n')
for clf,label in zip([clf1,clf2,clf3,sclf],['KNN','RFC','Naive Bayes','StackingClassifier']):
scores=cross_val_score(clf,X,y,cv=3,scoring='accuracy')
print("Accuracy: %0.4f (+/- %0.4f) [%s]"% (scores.mean(), scores.std(), label))
3-fold cross validation:
Accuracy: 0.9733 (+/- 0.0094) [KNN]
Accuracy: 0.9667 (+/- 0.0189) [RFC]
Accuracy: 0.9400 (+/- 0.0163) [Naive Bayes]
Accuracy: 0.9667 (+/- 0.0189) [StackingClassifier]
from sklearn.model_selection import GridSearchCV
clf1=KNeighborsClassifier(1)
clf2=RandomForestClassifier(n_estimators=100,random_state=RANDOM_SEED)
clf3=GaussianNB()
lr=LogisticRegression()
# 建立两层评估器
sclf=StackingCVClassifier(classifiers=[clf1,clf2,clf3]# 第一层分类器
,meta_classifier=lr#第二层分类器
,random_state=RANDOM_SEED)
params = {
'kneighborsclassifier__n_neighbors': [1, 5],
'randomforestclassifier__n_estimators': [10, 100],
'meta_classifier__C': [0.1, 10.0]}
grid = GridSearchCV(estimator=sclf
,param_grid=params
,cv=5
,refit=True
)
grid.fit(X, y)
cv_keys = ('mean_test_score', 'std_test_score', 'params')
for r, _ in enumerate(grid.cv_results_['mean_test_score']):
print("%0.4f +/- %0.4f %r"% (grid.cv_results_[cv_keys[0]][r]
,grid.cv_results_[cv_keys[1]][r] /2.0
,grid.cv_results_[cv_keys[2]][r]))
print('Best parameters: %s'%grid.best_params_)
print('Accuracy: %.4f'%grid.best_score_)
0.9667 +/- 0.0105 {'kneighborsclassifier__n_neighbors': 1, 'meta_classifier__C': 0.1, 'randomforestclassifier__n_estimators': 10}
0.9600 +/- 0.0125 {'kneighborsclassifier__n_neighbors': 1, 'meta_classifier__C': 0.1, 'randomforestclassifier__n_estimators': 100}
0.9600 +/- 0.0125 {'kneighborsclassifier__n_neighbors': 1, 'meta_classifier__C': 10.0, 'randomforestclassifier__n_estimators': 10}
0.9533 +/- 0.0170 {'kneighborsclassifier__n_neighbors': 1, 'meta_classifier__C': 10.0, 'randomforestclassifier__n_estimators': 100}
0.9667 +/- 0.0105 {'kneighborsclassifier__n_neighbors': 5, 'meta_classifier__C': 0.1, 'randomforestclassifier__n_estimators': 10}
0.9667 +/- 0.0105 {'kneighborsclassifier__n_neighbors': 5, 'meta_classifier__C': 0.1, 'randomforestclassifier__n_estimators': 100}
0.9733 +/- 0.0125 {'kneighborsclassifier__n_neighbors': 5, 'meta_classifier__C': 10.0, 'randomforestclassifier__n_estimators': 10}
0.9667 +/- 0.0183 {'kneighborsclassifier__n_neighbors': 5, 'meta_classifier__C': 10.0, 'randomforestclassifier__n_estimators': 100}
Best parameters: {'kneighborsclassifier__n_neighbors': 5, 'meta_classifier__C': 10.0, 'randomforestclassifier__n_estimators': 10}
Accuracy: 0.9733
from sklearn.pipeline import make_pipeline
from mlxtend.feature_selection import ColumnSelector
pipe1=make_pipeline(ColumnSelector(cols=(0,2)),
LogisticRegression())
pipe2=make_pipeline(ColumnSelector(cols=(1,2,3)),
LogisticRegression())
sclf_1 = StackingCVClassifier(classifiers=[pipe1, pipe2],meta_classifier=LogisticRegression(),random_state=42)
sclf.fit(X,y)
StackingCVClassifier(classifiers=[Pipeline(steps=[('columnselector',
ColumnSelector(cols=(0, 2))),
('logisticregression',
LogisticRegression())]),
Pipeline(steps=[('columnselector',
ColumnSelector(cols=(1, 2,
3))),
('logisticregression',
LogisticRegression())])],
meta_classifier=LogisticRegression(), random_state=42)
for clf,label in zip([clf1,clf2,clf3,sclf_1],['KNN','RFC','Naive Bayes','StackingClassifier_1']):
scores=cross_val_score(clf,X,y,cv=3,scoring='accuracy')
print("Accuracy: %0.4f (+/- %0.4f) [%s]"% (scores.mean(), scores.std(), label))
Accuracy: 0.9733 (+/- 0.0094) [KNN]
Accuracy: 0.9667 (+/- 0.0189) [RFC]
Accuracy: 0.9400 (+/- 0.0163) [Naive Bayes]
Accuracy: 0.9600 (+/- 0.0163) [StackingClassifier_1]