获取数据
用requests,请求网络数据,然后写入csv文件。
import requests
#从远程获取数据
url = "http://aima.cs.berkeley.edu/data/iris.csv"
response = requests.get(url)
#写入文件
local_file = open("iris.csv","w")
local_file.write(response.text)
local_file.close()
从文件中读取数据
from numpy import genfromtxt,zeros
#特征集
data = genfromtxt("iris.csv",delimiter=",",usecols=(0,1,2,3))
#分类标签
labels = genfromtxt("iris.csv",delimiter=",",usecols=(4),dtype=str)
数据为csv格式,以“,”分隔,分别为花萼的长宽,花瓣的长宽。
这里为了便于分析,学习,就叫特征0,1,2,3,最后一列为分类标签,数据格式如下:
查看样本数据的维度,标签的种类
#查看矩阵大小,标签种类
print(data.shape)
print(labels.shape)
print(set(labels))
结果:
根据数据开始绘图
这里需要引入package
from pylab import plot,show,figure,subplot,hist,xlim
import matplotlib.pyplot as plt
二维散点图
- 特征0,特征1
plot(data[labels=="setosa",0],data[labels=="setosa",1],"bo")
plot(data[labels=="virginica",0],data[labels=="virginica",1],"ro")
plot(data[labels=="versicolor",0],data[labels=="versicolor",1],"go")
show()
结果:
从上图看,好像并不能很好的区分种类。
- 特征0,2
plot(data[labels=="setosa",0],data[labels=="setosa",2],"bo")
plot(data[labels=="virginica",0],data[labels=="virginica",2],"ro")
plot(data[labels=="versicolor",0],data[labels=="versicolor",2],"go")
show()
结果:
这个就比较明显了
- 特征0,3
plot(data[labels=="setosa",0],data[labels=="setosa",3],"bo") plot(data[labels=="virginica",0],data[labels=="virginica",3],"ro") plot(data[labels=="versicolor",0],data[labels=="versicolor",3],"go") show()
结果:
![特征0,3](http://upload-images.jianshu.io/upload_images/1797187-1ae59f7b1f0f51bd.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
同理,可以用其他的特征量来生成二位散点图
####三维散点图
---
- 特征0,2,3
ax = plt.subplot(111,projection="3d")
ax.scatter(data[labels=="setosa",0],data[labels=="setosa",2],data[labels=="setosa",3],c="b")
ax.scatter(data[labels=="virginica",0],data[labels=="virginica",2],data[labels=="virginica",3],c="r")
ax.scatter(data[labels=="versicolor",0],data[labels=="versicolor",2],data[labels=="versicolor",3],c="g")
plt.show()
结果:
![特征0,2,3](http://upload-images.jianshu.io/upload_images/1797187-008f5382e3aad86b.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
####直方图
---
特征0
subplot(441)
hist(data[labels=="setosa",0],color="b",alpha=0.7)
xlim(xmin0,xmax0)
subplot(445)
hist(data[labels=="virginica",0],color="r",alpha=0.7)
xlim(xmin0,xmax0)
subplot(449)
hist(data[labels=="versicolor",0],color="g",alpha=0.7)
xlim(xmin0,xmax0)
subplot(4,4,13)
hist(data[:,0],color="y",alpha=0.7)
xlim(xmin0,xmax0)
特征1
xmin1 = min(data[:,1])
xmax1 = max(data[:,1])
subplot(442)
hist(data[labels=="setosa",1],color="b",alpha=0.7)
xlim(xmin1,xmax1)
subplot(446)
hist(data[labels=="virginica",1],color="r",alpha=0.7)
xlim(xmin1,xmax1)
subplot(4,4,10)
hist(data[labels=="versicolor",1],color="g",alpha=0.7)
xlim(xmin1,xmax1)
subplot(4,4,14)
hist(data[:,1],color="y",alpha=0.7)
xlim(xmin1,xmax1)
特征2
xmin2 = min(data[:,2])
xmax2 = max(data[:,2])
subplot(443)
hist(data[labels=="setosa",2],color="b",alpha=0.7)
xlim(xmin2,xmax2)
subplot(447)
hist(data[labels=="virginica",2],color="r",alpha=0.7)
xlim(xmin2,xmax2)
subplot(4,4,11)
hist(data[labels=="versicolor",2],color="g",alpha=0.7)
xlim(xmin2,xmax2)
subplot(4,4,15)
hist(data[:,2],color="y",alpha=0.7)
xlim(xmin2,xmax2)
特征3
xmin3 = min(data[:,3])
xmax3 = max(data[:,3])
subplot(444)
hist(data[labels=="setosa",3],color="b",alpha=0.7)
xlim(xmin3,xmax3)
subplot(448)
hist(data[labels=="virginica",3],color="r",alpha=0.7)
xlim(xmin3,xmax3)
subplot(4,4,12)
hist(data[labels=="versicolor",3],color="g",alpha=0.7)
xlim(xmin3,xmax3)
subplot(4,4,16)
hist(data[:,3],color="y",alpha=0.7)
xlim(xmin3,xmax3)
show()
**这里代码为了便于理解,学习,就没进行封装**
![直方图](http://upload-images.jianshu.io/upload_images/1797187-6a56c0fdb0d8f326.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)