集成学习
![机器学习之集成学习_第1张图片](http://img.e-com-net.com/image/info8/159a989899fa40ec9e6154798a794509.jpg)
bagging
![机器学习之集成学习_第2张图片](http://img.e-com-net.com/image/info8/dcc533ccf4ef444c92c67b1922e89804.jpg)
![机器学习之集成学习_第3张图片](http://img.e-com-net.com/image/info8/4bbf4474002f4381b11ebfff4927b04a.jpg)
bagging:
from sklearn import neighbors
from sklearn import datasets
from sklearn.ensemble import BaggingClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
def plot(model):
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
z = model.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
plt.contourf(xx, yy, z)
iris = datasets.load_iris()
x_data = iris.data[:,:2]
y_data = iris.target
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
x_train,x_test,y_train,y_test = train_test_split(x_data, y_data)
knn = neighbors.KNeighborsClassifier()
knn.fit(x_train, y_train)
plot(knn)
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
knn.score(x_test, y_test)
bagging_knn = BaggingClassifier(knn, n_estimators=100)
bagging_knn.fit(x_train, y_train)
plot(bagging_knn)
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
bagging_knn.score(x_test, y_test)
普通KNN分类器:
![机器学习之集成学习_第4张图片](http://img.e-com-net.com/image/info8/1a49f41fb16949fc96b4638524da1880.jpg)
Bagging+Knn分类器:
![机器学习之集成学习_第5张图片](http://img.e-com-net.com/image/info8/c625bb2d45b54d4b96d3e6106c1fd670.jpg)
随机森林RF:
![机器学习之集成学习_第6张图片](http://img.e-com-net.com/image/info8/7ac6d34ddbae4a2e86d4cec0ebb51bef.jpg)
![机器学习之集成学习_第7张图片](http://img.e-com-net.com/image/info8/3bbc6255a72145cea2d2ee10e7410ca8.jpg)
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import matplotlib.pyplot as plt
def plot(model):
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
z = model.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
plt.contourf(xx, yy, z)
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test)
plt.show()
data = np.genfromtxt("LR-testSet2.txt", delimiter=",")
x_data = data[:,:-1]
y_data = data[:,-1]
x_train,x_test,y_train,y_test = train_test_split(x_data, y_data, test_size = 0.5)
dtree = tree.DecisionTreeClassifier()
dtree.fit(x_train, y_train)
plot(dtree)
dtree.score(x_test, y_test)
RF = RandomForestClassifier(n_estimators=50)
RF.fit(x_train, y_train)
plot(RF)
RF.score(x_test, y_test)
单棵决策树结果:
![机器学习之集成学习_第8张图片](http://img.e-com-net.com/image/info8/96f9a3994fe24808aa90d658f21ed309.jpg)
随机森林结果:
![机器学习之集成学习_第9张图片](http://img.e-com-net.com/image/info8/7b49f00ac598476b852cb9ee5b3727c6.jpg)
boosting:
![机器学习之集成学习_第10张图片](http://img.e-com-net.com/image/info8/ed849fdf3adc41a3915c13e4d21124fa.jpg)
![机器学习之集成学习_第11张图片](http://img.e-com-net.com/image/info8/d372d147596b42dc8cc7e5e00321b5de.jpg)
![机器学习之集成学习_第12张图片](http://img.e-com-net.com/image/info8/adc1c036ba384c6b99f83e69bf3f14f9.jpg)
import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_gaussian_quantiles
from sklearn.metrics import classification_report
x1, y1 = make_gaussian_quantiles(n_samples=500, n_features=2,n_classes=2)
x2, y2 = make_gaussian_quantiles(mean=(3, 3), n_samples=500, n_features=2, n_classes=2)
x_data = np.concatenate((x1, x2))
y_data = np.concatenate((y1, -y2+1))
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
model = tree.DecisionTreeClassifier(max_depth=3)
model.fit(x_data, y_data)
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))
z = model.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
plt.contourf(xx, yy, z)
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),n_estimators=10)
model.fit(x_data, y_data)
x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),np.arange(y_min, y_max, 0.02))
z = model.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
plt.contourf(xx, yy, z)
plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
plt.show()
model.score(x_data,y_data)
原始数据:
![机器学习之集成学习_第13张图片](http://img.e-com-net.com/image/info8/5e07525a2fca4bf0bbbe2cb1ebab9fc9.jpg)
决策树模型:
![机器学习之集成学习_第14张图片](http://img.e-com-net.com/image/info8/2ccea1a382f145ffb718047cff8225fb.jpg)
AdaBoost模型:
![机器学习之集成学习_第15张图片](http://img.e-com-net.com/image/info8/239119d0337046ff9a202955c4b8db28.jpg)
stacking:
![机器学习之集成学习_第16张图片](http://img.e-com-net.com/image/info8/edb467471b404017b43462cbe1c995e6.jpg)
from sklearn import datasets
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from mlxtend.classifier import StackingClassifier
from sklearn.ensemble import VotingClassifier
import numpy as np
iris = datasets.load_iris()
x_data, y_data = iris.data[:, 1:3], iris.target
clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = DecisionTreeClassifier()
clf3 = LogisticRegression()
lr = LogisticRegression()
print("stacking:")
sclf1 = StackingClassifier(classifiers=[clf1, clf2, clf3],meta_classifier=lr)
for clf,label in zip([clf1, clf2, clf3, sclf1],['KNN','Decision Tree','LogisticRegression','StackingClassifier']):
scores = model_selection.cross_val_score(clf, x_data, y_data, cv=3, scoring='accuracy')
print("Accuracy: %0.2f [%s]" % (scores.mean(), label))
print("voting:")
sclf2 = VotingClassifier([('knn',clf1),('dtree',clf2), ('lr',clf3)])
for clf, label in zip([clf1, clf2, clf3, sclf2],
['KNN','Decision Tree','LogisticRegression','VotingClassifier']):
scores = model_selection.cross_val_score(clf, x_data, y_data, cv=3, scoring='accuracy')
print("Accuracy: %0.2f [%s]" % (scores.mean(), label))
![机器学习之集成学习_第17张图片](http://img.e-com-net.com/image/info8/3d615764d32846d888c4b0fe9f4bd826.jpg)