鸢尾花数据可视化——基于matplotlib

import numpy as np
import pandas as pd
from  matplotlib import pyplot as plt
import matplotlib.cm as cm
from matplotlib图形调配.图像基础属性 import Solve_Chinese,Set_label
import seaborn as sns

# 读取数据
data = pd.read_csv(r"C:\Users\luyao\Desktop\Iris.csv")
# print(data.head(),'\n')
# print(data.describe(),'\n')

#解决中文乱码问题
Solve_Chinese('FangSong')

#去掉种类中多余部分
data['Species'] = data['Species'].apply(lambda x: x.split('-')[1])
# print(data.head(),'\n')

# 数据转换,把种类映射成数据类别
print(data['Species'].unique(),'\n')
dict = {'setosa':0,'versicolor':1,'virginica':2}
data['Category_num'] = data['Species'].map(dict)
# data.drop(['Species'],axis=1,inplace=True)
print(data.head(),'\n')


# 认识数据
fig1 = plt.figure(1)
fig1.set(alpha=0.5)

# 先来看看各种种类的数量分布
data['Species'].value_counts().plot(kind = 'bar',title = '鸢尾花种类分布',color = 'c',rot=360 )
plt.show()

 鸢尾花数据可视化——基于matplotlib_第1张图片

# 查看特征组合对种类的区分效果
fig2 = plt.figure(2)
plt.subplot2grid((3,2),(0,0))
plt.scatter(data.Setal_length,data.Setal_width,c=data.Category_num)
Set_label('Setal_length','Seta_width')
plt.title("据萼片长度和宽度的种类分布图")
plt.subplot2grid((3,2),(0,1))
plt.scatter(data.Petal_length,data.Petal_width,c = data.Category_num)
Set_label('Petal_length','Petal_width')
plt.title("据花瓣长度和宽度的种类分布图")
plt.subplot2grid((3,2),(1,0))
plt.scatter(data.Petal_length,data.Setal_length,c = data.Category_num)
Set_label('Petal_length','Setal_length')
plt.title("据花瓣长度和萼片长度的种类分布图")
plt.subplot2grid((3,2),(1,1))
plt.scatter(data.Petal_width,data.Setal_width,c = data.Category_num)
Set_label('Petal_width','Setal_width')
plt.title("据花瓣宽度和萼片宽度的种类分布图")
plt.subplot2grid((3,2),(2,0))
plt.scatter(data.Petal_length,data.Setal_width,c = data.Category_num)
Set_label('Petal_length','Setal_width')
plt.title("据花瓣长度和萼片宽度的种类分布图")
plt.subplot2grid((3,2),(2,1))
plt.scatter(data.Petal_width,data.Setal_length,c = data.Category_num)
Set_label('Petal_width','Setal_length')
plt.title("据花瓣宽度和萼片长度的种类分布图")
plt.tight_layout()
plt.show()
## 可以看出,只有一类的与其他类别有明显的差异存在,另外两类相互之间存在重叠

 

鸢尾花数据可视化——基于matplotlib_第2张图片

 

# 上面的颜色太丑,我们调用CM模块改善视觉效果,感觉种类绘制散点图
# matplotlib的这种比较麻烦,seaborn作图就很方便~待我让后精进,再用seaborn精进~
def scatter_by_Category(data,Species,x,y):
    alpha = 0.5
    Categ = data.groupby(Species)
    print("The num of Category: " , len(Categ))
    Colorg = cm.rainbow(np.linspace(0,1,len(Categ)))
    print("The num of Colors: ", len(Colorg),'\n')
    for Category, Color in zip(Categ,Colorg):
        plt.scatter(Category[1][x],Category[1][y],color = Color, alpha = alpha)

fig3 = plt.figure(3)
plt.subplot(211)
scatter_by_Category(data,"Species",'Setal_length','Setal_width')
Set_label('Setal_length','Setal_width')
plt.title('Species')

plt.subplot(212)
scatter_by_Category(data,"Species","Petal_length","Petal_width")
Set_label("Petal_length","Petal_width")
plt.title('Species')
plt.tight_layout()
plt.show()

鸢尾花数据可视化——基于matplotlib_第3张图片 

# 进一步查看某种类下,各特征的值分布
fig4 = plt.figure(4)
plt.title('Setosa花类的表现')
# 删除多余刻度线
plt.xticks([])
plt.yticks([])

ax1 = fig4.add_subplot(141)
plt.boxplot( data[data['Species']=='setosa']['Setal_length'])
ax1.set_xticklabels([u'萼片长度'])


ax2=fig4.add_subplot(142)
plt.boxplot( data[data['Species']=='setosa']['Setal_width'])
ax2.set_xticklabels([u'萼片宽度'])


ax3 = fig4.add_subplot(143)
plt.boxplot( data[data['Species']=='setosa']['Petal_length'])
ax3.set_xticklabels([u'花瓣长度'])


ax4=fig4.add_subplot(144)
plt.boxplot( data[data['Species']=='setosa']['Petal_width'])
ax4.set_xticklabels([u'花瓣宽度'])
plt.show()

 鸢尾花数据可视化——基于matplotlib_第4张图片

 图形还是点丑~-~

下次学会了seaborn模块,改善我自己~

这下,我的作图功能得到了进一步提升~

你可能感兴趣的:(鸢尾花数据可视化——基于matplotlib)