numpy函数
import numpy as np
#均值
np.mean(item)
#方差
np.var(item)
#中位数
np.median(item)
#四分位数
np.percentile(item,25) #下四分位数
np.percentile(item,75) #上四分位数
插入表格
from IPython.display import display
# 样式
df = pd.DataFrame(data,index=['Setosa', 'Versicolour', 'Virginica'], columns=['均值','方差','中位数','下四分位数','上四分位数'])
display(df)
matplotlib函数
import matplotlib.pyplot as plt
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei'] #中文显示
#在jupyter显示
%matplotlib inline
plt.title('标题')
#箱状图(盒图)
plt.boxplot(LIST,labels=['Setosa', 'Versicolour', 'Virginica'])
plt.title('SepaLengthCm:花萼长度,单位cm')
plt.show()
#分位数图
k = [x for x in range(50)]
# print(p)
p = plt.scatter(k,np.sort(LIST[0]), color = 'red')
q = plt.scatter(k,np.sort(LIST[1]), color = 'yellow')
r = plt.scatter(k,np.sort(LIST[2]), color = 'blue')
plt.legend([p,q,r], ['Setosa', 'Versicolour', 'Virginica'], loc='upper left', scatterpoints=1)
plt.show()
loc为图例所有figure位置。
0: ‘best' 1: ‘upper right' 2: ‘upper left' 3: ‘lower left' |
4: ‘lower right' 5: ‘right' 6: ‘center left' |
7: ‘center right' 8: ‘lower center' 9: ‘upper center' 10: ‘center' |
scatterpoints为散点图图例条目创建的标记点数。
例子:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import display
from sklearn.datasets import load_iris #导入数据集iris
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei'] #中文显示
#在jupyter显示
%matplotlib inline
print('SepaLengthCm:花萼长度,单位cm - 第1组')
#载入数据集
iris = load_iris()
# print(iris.data) #输出数据集
# print(iris.target) #输出真实标签
#获取花卉两列数据集
# 花萼长度
DD = iris.data
nums = [x[0] for x in DD]
# 按种类分组,每组50
LIST = [nums[m:m+50] for m in range(150) if m%50==0]
# print(li)
#统计描述各项参数
data = []
for item in LIST:
# print(item)
datalist = []
#均值
datalist.append(np.mean(item))
#方差
datalist.append(np.var(item))
#中位数
datalist.append(np.median(item))
#四分位数
datalist.append(np.percentile(item,25))
datalist.append(np.percentile(item,75))
#添加得到的该行数据
data.append(datalist)
# 样式
df = pd.DataFrame(data,index=['Setosa', 'Versicolour', 'Virginica'], columns=['均值','方差','中位数','下四分位数','上四分位数'])
display(df)
plt.boxplot(LIST,labels=['Setosa', 'Versicolour', 'Virginica'])
plt.title('SepaLengthCm:花萼长度,单位cm')
plt.show()
k = [x for x in range(50)]
# print(p)
p = plt.scatter(k,np.sort(LIST[0]), color = 'red')
q = plt.scatter(k,np.sort(LIST[1]), color = 'yellow')
r = plt.scatter(k,np.sort(LIST[2]), color = 'blue')
plt.legend([p,q,r], ['Setosa', 'Versicolour', 'Virginica'], loc='upper left', scatterpoints=1)
plt.title('SepaLengthCm:花萼长度,单位cm')
plt.show()