Iris is a type of flower.
Iris dataset has in total 150 instances of sample data of three different subtypes of iris (Setosa, Versicolor, Virginica) with 50 instances each.
The data were gathered by American Botanist (plant scientist) Edgar Andersen at 1936
Irises are described by four measurements in this dataset: sepal length, sepal width, petal length, petal width
Iris数据集是常用的分类实验数据集,由Fisher, 1936收集整理。Iris也称鸢尾花卉数据集,是一类多重变量分析的数据集。数据集包含150个数据样本,分为3类,每类50个数据,每个数据包含4个属性。可通过花萼长度,花萼宽度,花瓣长度,花瓣宽度4个属性预测鸢尾花卉属于(Setosa,Versicolour,Virginica)三个种类中的哪一类。
#import matplotlib.image as mpimg
#img = mpimg.imread('03_iris.png')
#plt.imshow(img)
#调用图像库PIL(Python Image Library),包含图像类
from PIL import Image
#文件存在的路径,如果没有路径就是当前目录下文件
img = Image.open('03_iris.png')
#显示图片,可以img.show()
img
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
iris = load_iris()
print(iris.data)
print(iris.feature_names) #不知道column名称
print(iris.target)
print(iris.target_names)
print(type(iris.data))
print(type(iris.target))
print(iris.data.shape)
print(iris.target.shape)
X = iris.data
Y = iris.target
sepal_len, sepal_wid, petal_len, petal_wid = X[:,0],X[:,1],X[:,2],X[:,3]
from matplotlib.pyplot import figure
fig, axs = plt.subplots(1,2)
fig.set_figheight(12)
fig.set_figwidth(20)
fig.set_dpi(120)
axs[0].hist(petal_wid,bins=10, color="darkblue")
axs[0].set_xlabel('Petal Width')
axs[0].set_ylabel('Count')
axs[1].hist(petal_wid,bins=20, color="darkblue")
axs[1].set_xlabel('Petal Width')
axs[1].set_ylabel('Count')
fig = plt.figure(figsize=(8,8),dpi=120)
ax = fig.add_subplot(projection='3d')
x, y = petal_wid[::-1], petal_len
hist, xedges, yedges = np.histogram2d(x, y, bins=3)
# Construct arrays for the anchor positions of the 16 bars.
xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij")
xpos = xpos.ravel()
ypos = ypos.ravel()
zpos = 0
# Construct arrays with the dimensions for the 16 bars.
dx = dy = 0.5 * np.ones_like(zpos)
dz = hist.ravel()
ax.bar3d(xpos, ypos, zpos, dx, dy, dz,color='lightskyblue')
ax.set_xlabel('Petal Width')
ax.set_ylabel('Petal Length')
ax.set_zlabel('Count')
plt.show()
fig = plt.figure(figsize=(8,8),dpi=120)
data1 = [sepal_len, sepal_wid, petal_len, petal_wid]
ax = fig.add_subplot(111)
ax.boxplot(data1,labels=['Sepal Length','Sepal Width','Petal Length','Petal Width'],)
ax.set_ylabel('Values(Centimeters)')
iris = load_iris()
fig, axes = plt.subplots(nrows = 4, ncols = 4)
fig.set_figheight(10)
fig.set_figwidth(15)
fig.set_dpi(120)
axes[0][0].set_ylabel('sepal length')
axes[1][0].scatter(iris.data[:50,0],iris.data[:50,1], c = 'b', marker = 'x')
axes[1][0].scatter(iris.data[50:100,0],iris.data[50:100,1], c = 'g', marker = '+')
axes[1][0].scatter(iris.data[100:150,0],iris.data[100:150,1],facecolors='none', edgecolors='r')
axes[1][0].legend(['Setosa','Versicolor','Virginica'],bbox_to_anchor=(0.9,2.1),prop={'size':16})
axes[1][0].set_ylabel('sepal width')
axes[2][0].scatter(iris.data[:50,0],iris.data[:50,2], c = 'b', marker = 'x')
axes[2][0].scatter(iris.data[50:100,0],iris.data[50:100,2], c = 'g', marker = '+')
axes[2][0].scatter(iris.data[100:150,0],iris.data[100:150,2],facecolors='none', edgecolors='r')
axes[2][0].set_ylabel('petal length')
axes[3][0].scatter(iris.data[:50,0],iris.data[:50,3], c = 'b', marker = 'x')
axes[3][0].scatter(iris.data[50:100,0],iris.data[50:100,3], c = 'g', marker = '+')
axes[3][0].scatter(iris.data[100:150,0],iris.data[100:150,3],facecolors='none', edgecolors='r')
axes[3][0].set_xlabel('sepal length')
axes[3][0].set_ylabel('petal width')
axes[0][1].scatter(iris.data[:50,1],iris.data[:50,0], c = 'b', marker = 'x')
axes[0][1].scatter(iris.data[50:100,1],iris.data[50:100,0], c = 'g', marker = '+')
axes[0][1].scatter(iris.data[100:150,1],iris.data[100:150,0],facecolors='none', edgecolors='r')
axes[2][1].scatter(iris.data[:50,1],iris.data[:50,2], c = 'b', marker = 'x')
axes[2][1].scatter(iris.data[50:100,1],iris.data[50:100,2], c = 'g', marker = '+')
axes[2][1].scatter(iris.data[100:150,1],iris.data[100:150,2],facecolors='none', edgecolors='r')
axes[3][1].scatter(iris.data[:50,1],iris.data[:50,3], c = 'b', marker = 'x')
axes[3][1].scatter(iris.data[50:100,1],iris.data[50:100,3], c = 'g', marker = '+')
axes[3][1].scatter(iris.data[100:150,1],iris.data[100:150,3],facecolors='none', edgecolors='r')
axes[3][2].set_xlabel('sepal width')
axes[0][2].scatter(iris.data[:50,2],iris.data[:50,0], c = 'b', marker = 'x')
axes[0][2].scatter(iris.data[50:100,2],iris.data[50:100,0], c = 'g', marker = '+')
axes[0][2].scatter(iris.data[100:150,2],iris.data[100:150,0],facecolors='none', edgecolors='r')
axes[1][2].scatter(iris.data[:50,2],iris.data[:50,1], c = 'b', marker = 'x')
axes[1][2].scatter(iris.data[50:100,2],iris.data[50:100,1], c = 'g', marker = '+')
axes[1][2].scatter(iris.data[100:150,2],iris.data[100:150,1],facecolors='none', edgecolors='r')
axes[3][2].scatter(iris.data[:50,2],iris.data[:50,3], c = 'b', marker = 'x')
axes[3][2].scatter(iris.data[50:100,2],iris.data[50:100,3], c = 'g', marker = '+')
axes[3][2].scatter(iris.data[100:150,2],iris.data[100:150,3],facecolors='none', edgecolors='r')
axes[3][1].set_xlabel('petal length')
axes[0][3].scatter(iris.data[:50,3],iris.data[:50,0], c = 'b', marker = 'x')
axes[0][3].scatter(iris.data[50:100,3],iris.data[50:100,0], c = 'g', marker = '+')
axes[0][3].scatter(iris.data[100:150,3],iris.data[100:150,0],facecolors='none', edgecolors='r')
axes[1][3].scatter(iris.data[:50,3],iris.data[:50,1], c = 'b', marker = 'x')
axes[1][3].scatter(iris.data[50:100,3],iris.data[50:100,1], c = 'g', marker = '+')
axes[1][3].scatter(iris.data[100:150,3],iris.data[100:150,1],facecolors='none', edgecolors='r')
axes[2][3].scatter(iris.data[:50,3],iris.data[:50,2], c = 'b', marker = 'x')
axes[2][3].scatter(iris.data[50:100,3],iris.data[50:100,2], c = 'g', marker = '+')
axes[2][3].scatter(iris.data[100:150,3],iris.data[100:150,2],facecolors='none', edgecolors='r')
axes[3][3].set_xlabel('petal width')
plt.show()
from mpl_toolkits.axes_grid1 import make_axes_locatable
iris = load_iris()
fig = plt.figure(figsize=(6,6),dpi=120)
ax = fig.add_subplot()
data2D=iris.data
data2D[:,0]=(iris.data[:,0]-iris.data[:,0].mean())/iris.data[:,0].std()
data2D[:,1]=(iris.data[:,1]-iris.data[:,1].mean())/iris.data[:,1].std()
data2D[:,2]=(iris.data[:,2]-iris.data[:,2].mean())/iris.data[:,2].std()
data2D[:,3]=(iris.data[:,3]-iris.data[:,3].mean())/iris.data[:,3].std()
im = ax.imshow(data2D,cmap='jet',extent=[0,180,180,0])
plt.xticks(ticks=[22.5,67.5,112.5,157.5],labels=['sepal length','sepal width','petal length','petal width'])
plt.yticks(ticks=[60,120,180],labels=['50','100','150'])
plt.ylabel('Virginica Versicolor Setosa')
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="10%", pad=0.08)
plt.colorbar(im, cax=cax)
plt.show()
iris = load_iris()
fig = plt.figure(figsize=(6,6),dpi=120)
ax = fig.add_subplot()
matrix = np.corrcoef(iris.data)
im = ax.imshow(matrix,cmap='jet',extent=[0,180,150,0])
plt.xticks(ticks=[60,120],labels=['50','100'])
plt.yticks(ticks=[50,100,150],labels=['50','100','150'])
plt.ylabel('Virginica Versicolor Setosa')
plt.xlabel('Setosa Versicolor Virginica')
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="10%", pad=0.08)
plt.colorbar(im, cax=cax)
plt.show()
iris = load_iris()
fig, axs = plt.subplots(1,2)
fig.set_figheight(8)
fig.set_figwidth(20)
fig.set_dpi(120)
axs[0].plot(iris.data[0:50,:].T,'b--',linewidth=1.6,label='Setosa')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[0].legend(handles, labels, loc='best')
axs[0].plot(iris.data[50:100,:].T,'g--',linewidth=1.6,label='Versicolor')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[0].legend(handles, labels, loc='best')
axs[0].plot(iris.data[100:150,:].T,'r:',linewidth=1.6,label='Virginica')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[0].legend(handles, labels, loc='best')
axs[1].plot(iris.data[0:50,[1,0,2,3]].T,'b--',linewidth=1.6,label='Setosa')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[1].legend(handles, labels, loc='best')
axs[1].plot(iris.data[50:100,[1,0,2,3]].T,'g--',linewidth=1.6,label='Versicolor')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[1].legend(handles, labels, loc='best')
axs[1].plot(iris.data[100:150,[1,0,2,3]].T,'r:',linewidth=1.6,label='Virginica')
handles, labels = axs[0].get_legend_handles_labels()
labels, ids = np.unique(labels, return_index=True)
handles = [handles[i] for i in ids]
axs[1].legend(handles, labels, loc='best')
https://www.youtube.com/watch?v=hd1W4CyPX58
https://www.simplypsychology.org/boxplots.html#:~:text=What%20is%20a%20box%20plot,(or%20percentiles)%20and%20averages.