鸢尾花数据可视化

通过读取sklearn自带鸢尾花数据包,编程实现计算属性的相关性,数据可视化(要求:根据数据属性特点,绘制散点图)

import sklearn.datasets as datasets
X, d = datasets.load_iris(return_X_y=True)

print(X)

print(X.shape)
print(d)
import numpy as np
X, d = datasets.load_iris(return_X_y=True)
np.savez("iris.npz", data=X, target=d)
iris_file = np.load("iris.npz")
X = iris_file["data"]

d = iris_file["target"]

print(X.shape, d.shape, X.dtype, d.dtype)

import matplotlib.pyplot as plt
plt.switch_backend("TkAgg")
x = np.load("iris.npz")["data"]

d1 = np.load("iris.npz")["target"]

x1 = x[:, 0]

x2 = x[:, 1]

plt.scatter(x1, x2, c=d)

plt.show()

鸢尾花数据可视化_第1张图片

colors = ["#ff0000", "#00ff00", "#0000ff"]

for i in range(3):

     plt.scatter(x1[d == i], x2[d == i], color=colors[i], label=f"{i}")

plt.legend()

plt.show()

鸢尾花数据可视化_第2张图片

import matplotlib.pyplot as plt

plt.switch_backend("TkAgg")
x = np.load("iris.npz")["data"]

d = np.load("iris.npz")["target"]

def rho(x3, x4):

return np.mean((x3 - np.mean(x3)) * (x4 - np.mean(x4))) / np.std(x3) / np.std(x4)

x3 = x[:, 2]

x4 = x[:, 3]

print(rho(x3, x4))

colors = ["#ff0000", "#00ff00", "#0000ff"]

for i in range(3):

     x11 = x3[d == i]

     x22 = x4[d == i]   

print(i, rho(x11, x22))

plt.scatter(x3[d == i], x4[d == i], color=colors[i], label=f"{i}")

plt.legend()

plt.show()

鸢尾花数据可视化_第3张图片

你可能感兴趣的:(数据挖掘)