特征融合(stacking)

特征融合是在参加数据挖掘比赛最后过程中提分的一种重要手段
在做结果融合的时候,有一个很重要的条件是模型结果的得分要比较近似,然后结果的差异要比较大,这样的结果融合往往有比较好的效果提升
这里主要分享stacking方式

import numpy
import pandas
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from mlxtend.plotting import plot_learning_curves
from mlxtend.plotting import plot_decision_regions
from sklearn.metrics import accuracy_score
#鸢尾花数据集为例
iris = datasets.load_iris()
X,y = iris.data[:,1:3],iris.target
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state=100)
print(X,y)

特征融合(stacking)_第1张图片

#自己编写stack函数
clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
lr = LogisticRegression()
clf1.fit(X_train,y_train)
clf2.fit(X_train,y_train)
clf3.fit(X_train,y_train)
pred1 = clf1.predict(X_train)
pred2 = clf2.predict(X_train)
pred3 = clf3.predict(X_train)
preds = pandas.DataFrame()
preds['pred1'] = pred1
preds['pred2'] = pred2
preds['pred3'] = pred3
lr.fit(preds,y_train)
pred1 = clf1.predict(X_test)
pred2 = clf2.predict(X_test)
pred3 = clf3.predict(X_test)
preds2 = pandas.DataFrame()
preds2['pred1'] = pred1
preds2['pred2'] = pred2
preds2['pred3'] = pred3
re = lr.predict(preds2)
accuracy_score(y_test,re)

在这里插入图片描述

#使用mlxtend中的stack函数
clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
lr = LogisticRegression()
sclf = StackingClassifier(classifiers = [clf1,clf2,clf3],meta_classifier=lr)
sclf.fit(X_train,y_train)
pred = sclf.predict(X_test)
accuracy_score(y_test,pred)

在这里插入图片描述

你可能感兴趣的:(天池竞赛)