Iris dataset

1. Introduction

  • Iris is a type of flower.

  • Iris dataset has in total 150 instances of sample data of three different subtypes of iris (Setosa, Versicolor, Virginica) with 50 instances each.

  • The data were gathered by American Botanist (plant scientist) Edgar Andersen at 1936

  • Irises are described by four measurements in this dataset: sepal length, sepal width, petal length, petal width

Iris数据集是常用的分类实验数据集,由Fisher, 1936收集整理。Iris也称鸢尾花卉数据集,是一类多重变量分析的数据集。数据集包含150个数据样本,分为3类,每类50个数据,每个数据包含4个属性。可通过花萼长度,花萼宽度,花瓣长度,花瓣宽度4个属性预测鸢尾花卉属于(Setosa,Versicolour,Virginica)三个种类中的哪一类。

#import matplotlib.image as mpimg
#img = mpimg.imread('03_iris.png')
#plt.imshow(img)

#调用图像库PIL(Python Image Library),包含图像类
from PIL import Image	
#文件存在的路径,如果没有路径就是当前目录下文件
img = Image.open('03_iris.png') 
#显示图片,可以img.show()
img	

Data checking python

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
iris = load_iris()
print(iris.data)
print(iris.feature_names) #不知道column名称
print(iris.target)	
print(iris.target_names)
print(type(iris.data))
print(type(iris.target))
print(iris.data.shape)
print(iris.target.shape)

Data visualization

X = iris.data
Y = iris.target
sepal_len, sepal_wid, petal_len, petal_wid = X[:,0],X[:,1],X[:,2],X[:,3]
from matplotlib.pyplot import figure
fig, axs = plt.subplots(1,2)
fig.set_figheight(12)
fig.set_figwidth(20)
fig.set_dpi(120)
axs[0].hist(petal_wid,bins=10, color="darkblue")
axs[0].set_xlabel('Petal Width')
axs[0].set_ylabel('Count')

axs[1].hist(petal_wid,bins=20, color="darkblue")
axs[1].set_xlabel('Petal Width')
axs[1].set_ylabel('Count')
fig = plt.figure(figsize=(8,8),dpi=120)
ax = fig.add_subplot(projection='3d')
x, y = petal_wid[::-1], petal_len
hist, xedges, yedges = np.histogram2d(x, y, bins=3)

# Construct arrays for the anchor positions of the 16 bars.
xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij")
xpos = xpos.ravel()
ypos = ypos.ravel()
zpos = 0

# Construct arrays with the dimensions for the 16 bars.
dx = dy = 0.5 * np.ones_like(zpos)
dz = hist.ravel()

ax.bar3d(xpos, ypos, zpos, dx, dy, dz,color='lightskyblue')
ax.set_xlabel('Petal Width')
ax.set_ylabel('Petal Length')
ax.set_zlabel('Count')
plt.show()
fig = plt.figure(figsize=(8,8),dpi=120)
data1 = [sepal_len, sepal_wid, petal_len, petal_wid]
ax = fig.add_subplot(111)
ax.boxplot(data1,labels=['Sepal Length','Sepal Width','Petal Length','Petal Width'],)
ax.set_ylabel('Values(Centimeters)')
iris = load_iris()
fig, axes = plt.subplots(nrows = 4, ncols = 4)
fig.set_figheight(10)
fig.set_figwidth(15)
fig.set_dpi(120)
axes[0][0].set_ylabel('sepal length')
axes[1][0].scatter(iris.data[:50,0],iris.data[:50,1], c = 'b', marker = 'x')
axes[1][0].scatter(iris.data[50:100,0],iris.data[50:100,1], c = 'g', marker = '+')
axes[1][0].scatter(iris.data[100:150,0],iris.data[100:150,1],facecolors='none', edgecolors='r')
axes[1][0].legend(['Setosa','Versicolor','Virginica'],bbox_to_anchor=(0.9,2.1),prop={'size':16})
axes[1][0].set_ylabel('sepal width')
axes[2][0].scatter(iris.data[:50,0],iris.data[:50,2], c = 'b', marker = 'x')
axes[2][0].scatter(iris.data[50:100,0],iris.data[50:100,2], c = 'g', marker = '+')
axes[2][0].scatter(iris.data[100:150,0],iris.data[100:150,2],facecolors='none', edgecolors='r')
axes[2][0].set_ylabel('petal length')
axes[3][0].scatter(iris.data[:50,0],iris.data[:50,3], c = 'b', marker = 'x')
axes[3][0].scatter(iris.data[50:100,0],iris.data[50:100,3], c = 'g', marker = '+')
axes[3][0].scatter(iris.data[100:150,0],iris.data[100:150,3],facecolors='none', edgecolors='r')
axes[3][0].set_xlabel('sepal length')
axes[3][0].set_ylabel('petal width')
axes[0][1].scatter(iris.data[:50,1],iris.data[:50,0], c = 'b', marker = 'x')
axes[0][1].scatter(iris.data[50:100,1],iris.data[50:100,0], c = 'g', marker = '+')
axes[0][1].scatter(iris.data[100:150,1],iris.data[100:150,0],facecolors='none', edgecolors='r')
axes[2][1].scatter(iris.data[:50,1],iris.data[:50,2], c = 'b', marker = 'x')
axes[2][1].scatter(iris.data[50:100,1],iris.data[50:100,2], c = 'g', marker = '+')
axes[2][1].scatter(iris.data[100:150,1],iris.data[100:150,2],facecolors='none', edgecolors='r')
axes[3][1].scatter(iris.data[:50,1],iris.data[:50,3], c = 'b', marker = 'x')
axes[3][1].scatter(iris.data[50:100,1],iris.data[50:100,3], c = 'g', marker = '+')
axes[3][1].scatter(iris.data[100:150,1],iris.data[100:150,3],facecolors='none', edgecolors='r')
axes[3][2].set_xlabel('sepal width')
axes[0][2].scatter(iris.data[:50,2],iris.data[:50,0], c = 'b', marker = 'x')
axes[0][2].scatter(iris.data[50:100,2],iris.data[50:100,0], c = 'g', marker = '+')
axes[0][2].scatter(iris.data[100:150,2],iris.data[100:150,0],facecolors='none', edgecolors='r')
axes[1][2].scatter(iris.data[:50,2],iris.data[:50,1], c = 'b', marker = 'x')
axes[1][2].scatter(iris.data[50:100,2],iris.data[50:100,1], c = 'g', marker = '+')
axes[1][2].scatter(iris.data[100:150,2],iris.data[100:150,1],facecolors='none', edgecolors='r')
axes[3][2].scatter(iris.data[:50,2],iris.data[:50,3], c = 'b', marker = 'x')
axes[3][2].scatter(iris.data[50:100,2],iris.data[50:100,3], c = 'g', marker = '+')
axes[3][2].scatter(iris.data[100:150,2],iris.data[100:150,3],facecolors='none', edgecolors='r')
axes[3][1].set_xlabel('petal length')
axes[0][3].scatter(iris.data[:50,3],iris.data[:50,0], c = 'b', marker = 'x')
axes[0][3].scatter(iris.data[50:100,3],iris.data[50:100,0], c = 'g', marker = '+')
axes[0][3].scatter(iris.data[100:150,3],iris.data[100:150,0],facecolors='none', edgecolors='r')
axes[1][3].scatter(iris.data[:50,3],iris.data[:50,1], c = 'b', marker = 'x')
axes[1][3].scatter(iris.data[50:100,3],iris.data[50:100,1], c = 'g', marker = '+')
axes[1][3].scatter(iris.data[100:150,3],iris.data[100:150,1],facecolors='none', edgecolors='r')
axes[2][3].scatter(iris.data[:50,3],iris.data[:50,2], c = 'b', marker = 'x')
axes[2][3].scatter(iris.data[50:100,3],iris.data[50:100,2], c = 'g', marker = '+')
axes[2][3].scatter(iris.data[100:150,3],iris.data[100:150,2],facecolors='none', edgecolors='r')
axes[3][3].set_xlabel('petal width')
plt.show()
from mpl_toolkits.axes_grid1 import make_axes_locatable
iris = load_iris()
fig = plt.figure(figsize=(6,6),dpi=120)
ax = fig.add_subplot()
data2D=iris.data
data2D[:,0]=(iris.data[:,0]-iris.data[:,0].mean())/iris.data[:,0].std()
data2D[:,1]=(iris.data[:,1]-iris.data[:,1].mean())/iris.data[:,1].std()
data2D[:,2]=(iris.data[:,2]-iris.data[:,2].mean())/iris.data[:,2].std()
data2D[:,3]=(iris.data[:,3]-iris.data[:,3].mean())/iris.data[:,3].std()
im = ax.imshow(data2D,cmap='jet',extent=[0,180,180,0])

plt.xticks(ticks=[22.5,67.5,112.5,157.5],labels=['sepal length','sepal width','petal length','petal width'])
plt.yticks(ticks=[60,120,180],labels=['50','100','150'])
plt.ylabel('Virginica             Versicolor             Setosa')
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="10%", pad=0.08)

plt.colorbar(im, cax=cax)
plt.show()
iris = load_iris()
fig = plt.figure(figsize=(6,6),dpi=120)
ax = fig.add_subplot()
matrix = np.corrcoef(iris.data)
im = ax.imshow(matrix,cmap='jet',extent=[0,180,150,0])
plt.xticks(ticks=[60,120],labels=['50','100'])
plt.yticks(ticks=[50,100,150],labels=['50','100','150'])
plt.ylabel('Virginica                Versicolor                Setosa')
plt.xlabel('Setosa             Versicolor             Virginica')
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="10%", pad=0.08)
plt.colorbar(im, cax=cax)
plt.show()
iris = load_iris()
fig, axs = plt.subplots(1,2)
fig.set_figheight(8)
fig.set_figwidth(20)
fig.set_dpi(120)
axs[0].plot(iris.data[0:50,:].T,'b--',linewidth=1.6,label='Setosa')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[0].legend(handles, labels, loc='best')
axs[0].plot(iris.data[50:100,:].T,'g--',linewidth=1.6,label='Versicolor')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[0].legend(handles, labels, loc='best')
axs[0].plot(iris.data[100:150,:].T,'r:',linewidth=1.6,label='Virginica')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[0].legend(handles, labels, loc='best')

axs[1].plot(iris.data[0:50,[1,0,2,3]].T,'b--',linewidth=1.6,label='Setosa')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[1].legend(handles, labels, loc='best')
axs[1].plot(iris.data[50:100,[1,0,2,3]].T,'g--',linewidth=1.6,label='Versicolor')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[1].legend(handles, labels, loc='best')
axs[1].plot(iris.data[100:150,[1,0,2,3]].T,'r:',linewidth=1.6,label='Virginica')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[1].legend(handles, labels, loc='best')

Reference

  • Getting   started   in   scikit-learn   with   the   famous   iris   dataset. \textit{Getting started in scikit-learn with the famous iris dataset.} Getting started in scikit-learn with the famous iris dataset. Data School, Youtube Channel

https://www.youtube.com/watch?v=hd1W4CyPX58

  • What   does   a   box   plot   tell   you? \textit{What does a box plot tell you?} What does a box plot tell you? Saul McLeod, Simply Psychology

https://www.simplypsychology.org/boxplots.html#:~:text=What%20is%20a%20box%20plot,(or%20percentiles)%20and%20averages.

你可能感兴趣的:(机器学习,python,机器学习,开发语言)