Matplotlib可视化
- matplotlib及环境配置
- 数据图的组成结构,与matplotlib对应的名称
- 常见的数据绘图类型,与绘制方法
matplotlib安装配置
- linux
- pip install numpy
- pip install scipy
- pip install matplotlib
- windows
- anaconda
可视化图的基本结构
- 通常使用numpy组织数据,使用matplotlib API进行数据图像绘制,一副数据图基本包括如下
- Data 数据区
- Axis 坐标轴
- Title 标题
- Legend 图例,区分图中包含的多种曲线或不同分类的数据
- 使用matplotlib绘制图:
- 导入matplotlib包相关工具
- 准备数据,numpy数据存储
- 绘制原始曲线
- 配置标题,坐标轴,刻度,图例
- 添加文字说明,注释
- 显示,保存绘图结构
#导包
%matplotlib inline#表示当前获得图片展示,在ipython展出
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
#准备数据 numpy常用来组织源数据
x = np.arange(0.,10,0.2)#从0开始步长0.2,到9.8
y1 = np.cos(x)
y2 = np.sin(x)
y3 = np.sqrt(x)
x
array([0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8, 2. , 2.2, 2.4,
2.6, 2.8, 3. , 3.2, 3.4, 3.6, 3.8, 4. , 4.2, 4.4, 4.6, 4.8, 5. ,
5.2, 5.4, 5.6, 5.8, 6. , 6.2, 6.4, 6.6, 6.8, 7. , 7.2, 7.4, 7.6,
7.8, 8. , 8.2, 8.4, 8.6, 8.8, 9. , 9.2, 9.4, 9.6, 9.8])
#绘制plot函数直接绘制上述函数曲线,可以通过配置plot函数参数调整曲线的:
#样式,粗细,颜色,标记等
#color 指定线颜色:r红色,g绿色,b蓝色,c cyan,m紫色,y土黄色,k黑色,w白色
#linewidth 线的粗细
#linestyle 线的样式 实现,虚线...
#marker : 每个点标记样式
#label: 给函数打上标签
plt.plot(x,y1,color="blue",linewidth=1.5,linestyle='-',marker=".",label=r"$y=cos{x}$")
plt.plot(x,y2,color="red",linewidth=1.5,linestyle='-',marker="*",label=r"$y=sin{x}$")
plt.plot(x,y3,color="green",linewidth=1.5,linestyle='-',marker="x",label=r"$y=\sqrt{x}$")
[]
#设置坐标轴:
#设置边框
ax = plt.subplot(111)
#将右侧边框颜色设为白色,表示边框线隐藏
ax.spines['right'].set_color("none")
#将上侧边框颜色设为白色,表示边框线隐藏
ax.spines['top'].set_color("none")
#移动下边框,相当于移动x轴
ax.xaxis.set_ticks_position("bottom")
ax.spines['bottom'].set_position(("data",0.2))
#移动左边框,相当于移动y轴
ax.yaxis.set_ticks_position("left")
ax.spines['left'].set_position(("data",0.2))
#设置坐标轴:
#设置边框
ax = plt.subplot(111)
#将右侧边框颜色设为白色,表示边框线隐藏
ax.spines['right'].set_color("none")
#将上侧边框颜色设为白色,表示边框线隐藏
ax.spines['top'].set_color("none")
#移动下边框,相当于移动x轴
ax.xaxis.set_ticks_position("bottom")
ax.spines['bottom'].set_position(("data",0))
#移动左边框,相当于移动y轴
ax.yaxis.set_ticks_position("left")
ax.spines['left'].set_position(("data",0))
#通过如下代码,设置可读尺间隔lim,可读标签ticks
#设置x,y轴的可读取值范围
plt.xlim(x.min()*1.1,x.max()*1.1)#设置x取值范围
plt.ylim(-1.5,4.0)#设置y取值范围
#设置x,y轴的可读标签值
plt.xticks([2,4,6,8,10],[r"2",r"4",r"6",r"8",r"10"])
plt.yticks([-1.0,0.0,1.0,2.0,3.0,4.0],[r"-1.0",r"0.0",r"1.0",r"2.0",r"3.0",r"4.0",])
([,
,
,
,
,
],
)
#设置x,y坐标轴和标题
plt.title(r"$the \ function \ figure \ of \ cos(), \ sin() \ and \ sqrt()$",fontsize=19)
#labelpad 标注x,y注释离x,y距离
#fontsize 字体大小
plt.xlabel(r"$the \ input \ value \ of \ x$",fontsize=18,labelpad=50.8)
plt.ylabel(r"$y=f(x)$",fontsize=18,labelpad=12.5)
Text(0, 0.5, '$y=f(x)$')
#设置文字描述注释:对变量x,y进行描述,注释。
#通过代码在数据图中添加文字描述
plt.text(4,1.68,r"$x \ in [0.0, \ 10.0]$",color='k',fontsize=15)
plt.text(4,1.38,r"$y \ in [-1.0, \ 4.0]$",color='k',fontsize=15)
Text(4, 1.38, '$y \\ in [-1.0, \\ 4.0]$')
#对特殊点增加注释
# arrowprops表示描述箭头属性
plt.scatter([8,],[np.sqrt(8),],5,color="m")#使用三点图放大当前点
plt.annotate(r"$2\sqrt{2}$",xy=(8,np.sqrt(8)),xytext=(8.5,2.2),fontsize=16,color="#090909",\
arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.1",color="#090909"))
Text(8.5, 2.2, '$2\\sqrt{2}$')
#设置图例
#可使用如下两种方式,给绘图设置图例
#1. 在plt.plot函数中添加label参数后,使用plt.legend(loc="upper right")
#2. 不使用参数label,直接使用如下命令
plt.legend(['cos(x)',"sin(x)","sqrt(x)"],loc="upper right")
#指定网格线开关
plt.grid(True)
#显示图像,并保存
plt.show()
#保存 savefig("xxx/xxx/png",dpi=48)
%matplotlib inline
#表示当前获得图片展示,在ipython展出
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
#定义数据部分
x = np.arange(0.,10,0.2)#从0开始步长0.2,到9.8
y1 = np.cos(x)
y2 = np.sin(x)
y3 = np.sqrt(x)
#绘制 3条函数曲线
plt.plot(x,y1,color="blue",linewidth=1.5,linestyle='-',marker=".",label=r"$y=cos{x}$")
plt.plot(x,y2,color="red",linewidth=1.5,linestyle='-',marker="*",label=r"$y=sin{x}$")
plt.plot(x,y3,color="green",linewidth=1.5,linestyle='-',marker="x",label=r"$y=\sqrt{x}$")
#坐标轴上移
#设置边框
ax = plt.subplot(111)
#将右侧边框颜色设为白色,表示边框线隐藏
ax.spines['right'].set_color("none")
#将上侧边框颜色设为白色,表示边框线隐藏
ax.spines['top'].set_color("none")
#移动下边框,相当于移动x轴
ax.xaxis.set_ticks_position("bottom")
ax.spines['bottom'].set_position(("data",0))
#移动左边框,相当于移动y轴
ax.yaxis.set_ticks_position("left")
ax.spines['left'].set_position(("data",0))
#设置x,y轴的可读取值范围
plt.xlim(x.min()*1.1,x.max()*1.1)#设置x取值范围
plt.ylim(-1.5,4.0)#设置y取值范围
#设置x,y轴的可读标签值
plt.xticks([2,4,6,8,10],[r"2",r"4",r"6",r"8",r"10"])
plt.yticks([-1.0,0.0,1.0,2.0,3.0,4.0],[r"-1.0",r"0.0",r"1.0",r"2.0",r"3.0",r"4.0",])
#设置文字描述注释:对变量x,y进行描述,注释。
plt.title(r"$the \ function \ figure \ of \ cos(), \ sin() \ and \ sqrt()$",fontsize=19)
#labelpad 标注x,y注释离x,y距离
#fontsize 字体大小
plt.xlabel(r"$the \ input \ value \ of \ x$",fontsize=18,labelpad=50.8)
plt.ylabel(r"$y=f(x)$",fontsize=18,labelpad=12.5)
#添加文字
plt.text(4,1.68,r"$x \ in [0.0, \ 10.0]$",color='k',fontsize=15)
plt.text(4,1.38,r"$y \ in [-1.0, \ 4.0]$",color='k',fontsize=15)
#特殊点加注释
plt.scatter([8,],[np.sqrt(8),],5,color="m")#使用三点图放大当前点
plt.annotate(r"$2\sqrt{2}$",xy=(8,np.sqrt(8)),xytext=(8.5,2.2),fontsize=16,color="#090909",\
arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.1",color="#090909"))
#设置图例和位置
plt.legend(loc="upper left")
#显示网格
plt.grid(True)
#显示绘制图片
plt.show()
常用图形,介绍
如下:
- 曲线图:matplotlib.pyplot.plot(data)
- 灰度图:matplotlib.pyplot.hist(data)
- 散点图:matplotlib.pyplot.scatter(data)
- 箱式图:matplotlib.pyplot.boxplot(data)
x = np.arange(-5,5.1)
y = x ** 2
plt.plot(x,y)
[]
#柱状图
x = np.random.normal(size=1000)
plt.hist(x,bins=10)
(array([ 11., 22., 75., 147., 222., 224., 166., 86., 39., 8.]),
array([-3.14269253, -2.52579429, -1.90889605, -1.29199781, -0.67509957,
-0.05820134, 0.5586969 , 1.17559514, 1.79249338, 2.40939161,
3.02628985]),
)
#散点图
x = np.random.normal(size=1000)
y = np.random.normal(size=1000)
plt.scatter(x,y)
#箱式图
plt.boxplot(x)
#红线中位数
#
#箱子上方 上四分位数Q3, 下方:下四分位数Q1
{'whiskers': [,
],
'caps': [,
],
'boxes': [],
'medians': [],
'fliers': [],
'means': []}
箱式图科普
- 上边缘(Q3+1.5IQR),下边缘(Q1-1.5IQR),IQR=Q3-Q1
- 上四分位数(Q3),下四分位数(Q1)
- 中位数
- 异常值
- 处理异常值时与3西格玛 标准一同:统计便捷是否受异常值影响,容忍度的大小
from PIL import Image
import matplotlib.pyplot as plt
img = Image.open("91}D3W4T$~N{Z[NGJZSLVPL.png")
plt.imshow(img)
案例:自行车租赁数据分析与可视化
- 步骤1:导入数据,做简单的数据处理
import pandas as pd
import requests
from urllib import request
import tempfile
import shutil
import zipfile
temp_dir = tempfile.mkdtemp()#建立临时目录
temp_dir
'C:\\Users\\XUJK~1\\AppData\\Local\\Temp\\tmp2278a01l'
# data_source = "http://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip"
zipname = temp_dir + "/Bike-Sharing-Dataset.zip"#拼接文件路经
# request.urlretrieve(data_source,zipname)#获得数据
zip_ref = zipfile.ZipFile(zipname,"r")#创建一个ZipFile对象处理压缩文件
zip_ref.extractall(temp_dir)
zip_ref.close()
daily_path = "data/day.csv"
daily_data = pd.read_csv(daily_path)#读取csv文件
#把字符串数据转换成日期数据
daily_data['dteday'] = pd.to_datetime(daily_data['dteday'])
drop_list = []
drop_list = ['instant',"season","yr","mnth","holiday","workingday","weathersit","atemp","hum"]
daily_data.drop(drop_list,inplace=True,axis=1)#inplace=true,在对象上直接操作
shutil.rmtree(temp_dir)#删除临时文件目录
daily_data.head()#看看数据
dteday | weekday | temp | windspeed | casual | registered | cnt | |
---|---|---|---|---|---|---|---|
0 | 2011-01-01 | 6 | 0.344167 | 0.160446 | 331 | 654 | 985 |
1 | 2011-01-02 | 0 | 0.363478 | 0.248539 | 131 | 670 | 801 |
2 | 2011-01-03 | 1 | 0.196364 | 0.248309 | 120 | 1229 | 1349 |
3 | 2011-01-04 | 2 | 0.200000 | 0.160296 | 108 | 1454 | 1562 |
4 | 2011-01-05 | 3 | 0.226957 | 0.186900 | 82 | 1518 | 1600 |
#步骤2配置参数
from __future__ import division,print_function
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
#在notebook 中显示绘图结果
%matplotlib inline
#设置一些全局的资源参数,可以进行个性化修改
import matplotlib
#设置图片尺寸“14*7”
matplotlib.rc("figure",figsize=(14,7))
#设置字体14
matplotlib.rc("font",size=14)
#不显示顶部和右侧坐标线
matplotlib.rc("axes.spines",top=False,right=False)
#不显示网格
matplotlib.rc("axes",grid=False)
#设置背景颜色为白色
matplotlib.rc('axes',facecolor='white')
#步骤3:关联分析
#散点图
#分析变量关系
数据分析图像选择:
- 关联分析,数值比较:散点图,曲线图
- 分布分析:灰度图,密度图
- 涉及分类的分析:柱状图,箱式图
#关联分析:
#包装一个散点图的函数便于复用
def scatterplot(x_data,y_data,x_label,y_label,title):
#创建一个绘图对象
fig,ax=plt.subplots()
#设置数据,点的大小,点的颜色和透明度
ax.scatter(x_data,y_data,s=10,color="#539caf",alpha=0.75)#http://www.1141a.com/other/rgb/htm
#添加标题和坐标说明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
#绘制散点图
#温度和租自行车的关联关系:
#temp是温度,cnt是多少人租了自行车
scatterplot(x_data=daily_data['temp'],y_data=daily_data['cnt'],x_label="Normalized temperature (c)",y_label="Check outs",title="Number of Check Outs vs Temperature")
#线性回归
import statsmodels.api as sm#最小二乘
from statsmodels.stats.outliers_influence import summary_table#获得汇总信息
x = sm.add_constant(daily_data['temp'])#线性回归增加常数项y = kx+b
y = daily_data['cnt']
regr = sm.OLS(y,x) #普通最小二乘模型
res = regr.fit()
#从模型获得拟合数据
st,data,ss2 = summary_table(res,alpha=0.05)#置信水平 alpha=5%,st数据汇总,data数据详情,ss2数据列名
fitted_values = data[:,2]
fitted_values
array([3500.1553571 , 3628.39410788, 2518.63849718, 2542.78411874,
2721.79773817, 2571.65792581, 2519.68772936, 2310.35926879,
2133.27145526, 2216.28033024, 2337.52641339, 2361.67203495,
2310.35926879, 2282.9331365 , 2764.13890512, 2753.07548226,
2382.2980802 , 2653.46483228, 3154.88492215, 2952.29678222,
2393.36814377, 1607.30995753, 1855.6147373 , 1861.38949871,
2698.69470809, 2658.99654371, 2509.58056875, 2565.88050811,
2519.68772936, 2652.50192934, 2415.5016302 , 2490.81392229,
2941.22671865, 2456.16933823, 2617.85070456, 2764.13890512,
3112.77618004, 3018.70388221, 2681.13003014, 2109.69693476,
2173.2153259 , 2470.34061336, 2692.20009371, 3316.57292919,
3970.53676843, 2981.64872041, 3328.12112388, 4108.88267983,
4678.88138184, 3865.39440702, 3108.68550269, 3228.98860502,
2424.72557638, 2687.1465134 , 3177.98131152, 3634.17152558,
3090.64269362, 3495.57990791, 3919.22400227, 2985.50033221,
3439.27996854, 2531.71405517, 2952.29678222, 3765.78375704,
3715.0155291 , 2952.77491334, 3157.0497936 , 3179.18328003,
3798.48261307, 3316.57292919, 3400.54470712, 3766.98572555,
3374.31390263, 3322.34370618, 3639.94230257, 3970.53676843,
4800.62551825, 4352.37759334, 3422.67819355, 4073.03612725,
4147.62458196, 3518.682938 , 3107.24446862, 2968.89855722,
2979.96198007, 2895.02729919, 2970.10052573, 3223.45689359,
3206.85511859, 2996.56375507, 3206.85511859, 3306.46576857,
3727.04185491, 5021.98030463, 3965.005057 , 3810.05072989,
4119.95274339, 3444.81167997, 3489.08529353, 4048.01393198,
5170.19431108, 4551.5988933 , 3953.93499343, 4319.17404335,
4180.82813195, 4075.67912983, 4247.23523194, 4618.00599329,
4573.73237973, 5165.86456817, 4263.83700693, 3450.35003211,
4269.36871836, 5077.32398176, 5243.34173172, 5409.35948168,
5331.88231813, 5315.28054313, 4601.40421829, 4352.37759334,
4214.03168194, 4861.5009068 , 5309.7488317 , 3965.005057 ,
4263.83700693, 4396.6512069 , 4667.81131827, 4723.14835469,
4750.82019326, 4750.82019326, 4817.22729324, 4767.42196825,
4618.00599329, 4673.3430297 , 4950.04149321, 5049.65214319,
4944.50978178, 4867.03261823, 4739.75012969, 4778.49203182,
5215.66989315, 5226.73995672, 5409.35948168, 5597.51071807,
5603.0424295 , 5918.47615443, 5741.39498161, 5569.83887951,
5647.31604306, 6084.4939044 , 6361.1923679 , 6289.25355649,
5962.74976799, 5331.88231813, 5431.49296811, 5520.03355452,
5719.25485447, 5912.944443 , 6366.72407933, 6582.54715429,
6228.37816793, 6029.15686798, 5813.33379302, 5431.49296811,
5226.73995672, 5376.15593169, 5387.21935455, 5525.57190666,
5841.00563159, 5857.60740658, 5431.49296811, 5735.85662947,
6084.4939044 , 6051.2903544 , 6023.62515655, 5829.93556802,
5730.32491804, 5746.92669304, 6156.43935652, 6051.2903544 ,
5841.00563159, 6012.55509298, 6117.69745439, 5973.81983156,
6040.22693154, 6173.04113152, 5995.95331798, 6195.17461794,
5924.01450657, 6084.4939044 , 6178.57284294, 6278.18349292,
6488.47485645, 6173.04113152, 5735.85662947, 5619.6442045 ,
5774.5985316 , 5990.42160656, 6173.04113152, 6372.26243147,
6316.91875434, 6626.82076785, 6848.17555423, 6853.71390637,
6726.43141782, 6150.90100438, 6339.05888148, 6361.1923679 ,
6388.86420647, 6781.76845424, 6554.88195643, 6565.94537929,
6339.05888148, 6416.52940432, 6073.43048154, 5929.546218 ,
5935.07792943, 5973.81983156, 6145.36929295, 6294.78526792,
6361.1923679 , 6305.85533149, 5979.35154299, 5918.47615443,
5769.06017946, 5708.19143162, 5636.24597949, 5868.67082944,
6018.08680441, 5940.61628157, 5763.52846804, 5846.53734302,
5935.07792943, 5807.8020816 , 5470.22822953, 5686.05130448,
5757.99675661, 5863.13911801, 5730.32491804, 5910.01588989,
5442.56303168, 5459.16480667, 5575.37723165, 5564.30716808,
5486.83000453, 5658.38610663, 5924.01450657, 5686.05130448,
4800.62551825, 5193.53640673, 5424.27451634, 5531.10361809,
5597.51071807, 5553.23710451, 5493.57032517, 5536.63532951,
5686.05130448, 5049.65214319, 4330.24410692, 4479.66008189,
4584.80244329, 4861.5009068 , 4944.50978178, 5165.86456817,
5387.21935455, 5259.94350672, 5243.34173172, 5425.96125668,
5525.57190666, 5442.56303168, 5431.49296811, 5309.7488317 ,
4961.11155678, 3937.33321843, 3583.16423208, 3765.78375704,
4429.8547569 , 4789.55545468, 4496.26185688, 4606.93592972,
4678.88138184, 4806.15722967, 5005.37852963, 4977.71333178,
4822.75900467, 5127.12930674, 4872.56432966, 4579.27073186,
4612.47428186, 4761.89025682, 4750.82019326, 4812.17371293,
4374.51107977, 4053.54564341, 4020.34209342, 4014.81038199,
4291.50220479, 4346.84588191, 4429.8547569 , 4335.77581835,
3411.60812998, 2902.49145723, 3334.13760714, 3472.48351854,
3876.45782988, 3721.51014348, 3926.26315487, 3893.05960487,
3383.94293213, 3527.82055495, 3837.72256846, 3926.26315487,
3870.92611845, 3738.11191848, 3367.34115713, 3583.16423208,
4142.08622982, 4734.21841826, 4734.21841826, 4247.23523194,
3483.5535821 , 3035.3056572 , 3400.54470712, 4291.50220479,
4186.35984338, 3981.60683199, 4142.08622982, 3693.83830492,
3704.90836849, 3710.44007991, 4263.83700693, 4558.09350768,
4258.29865479, 3372.87286856, 3289.86399358, 3300.93405714,
3201.32340717, 3411.60812998, 3776.8471799 , 4285.97049336,
3937.33321843, 2979.96198007, 3145.97973004, 3040.83736863,
2681.13003014, 2797.34245511, 3090.64269362, 3323.06754357,
4020.34209342, 3704.90836849, 2930.15665508, 2797.34245511,
3051.9074322 , 3776.8471799 , 4059.07735484, 4025.87380484,
3693.83830492, 3223.45689359, 3039.39633456, 3351.21751325,
3372.87286856, 3201.0777009 , 2863.7495551 , 3284.33228215,
3937.33321843, 3671.70481849, 3027.84149916, 2210.74861881,
1928.51844387, 2979.96198007, 3433.74825711, 3826.65250489,
3455.88174354, 2703.27015727, 3264.60273274, 3035.3056572 ,
3754.71369347, 3035.3056572 , 2409.96991877, 2321.42933236,
2476.37701875, 3691.91249902, 3228.98860502, 2476.37701875,
2658.99654371, 2365.69630521, 2293.75749379, 2664.52825514,
3489.08529353, 3168.11985717, 3483.5535821 , 4036.94386841,
3311.99748 , 3090.64269362, 3002.10210721, 3804.51901846,
4330.24410692, 3865.39440702, 3295.395705 , 2968.89855722,
2979.96198007, 3091.36653101, 3566.56245709, 2919.09323223,
2974.43026864, 3079.57263005, 2703.27015727, 2061.33264384,
2692.20009371, 3334.13760714, 3527.82055495, 3317.53583214,
3494.61700496, 3516.7571321 , 3074.04091862, 3074.04091862,
3126.01111507, 3843.25427988, 4230.63345694, 3920.73144344,
3145.97973004, 3068.50920719, 3649.57133207, 3599.76600708,
3501.35732561, 4440.91817975, 3561.02410495, 3965.005057 ,
3378.40457999, 2830.54600511, 2930.15665508, 3898.59795701,
4717.61664326, 3942.86492986, 3123.84624361, 3616.84591319,
4313.64233192, 4966.64326821, 5016.4485932 , 4916.83794322,
4108.88267983, 4629.07605685, 4352.37759334, 4833.82906824,
4938.97142965, 4745.28848183, 4894.7044568 , 5210.13818173,
4551.5988933 , 4119.95274339, 4175.28977981, 3361.80280499,
4429.8547569 , 4496.26185688, 3671.70481849, 4031.41215698,
4042.47557984, 4096.13251663, 4313.64233192, 4811.69558181,
4103.3509684 , 3893.05960487, 4119.95274339, 4534.9971183 ,
4463.05830689, 4180.82813195, 3530.23113268, 3854.32434345,
4153.15629339, 4501.79356831, 5243.34173172, 5625.18255664,
5254.40515458, 4291.50220479, 4523.92705474, 4712.08493184,
4999.8468182 , 3848.79263202, 3350.73938213, 3959.46670486,
4380.04943191, 4523.92705474, 4252.76694337, 3715.97843205,
4258.29865479, 4297.04055693, 5287.60870457, 4961.11155678,
4933.43971822, 5381.68764312, 5342.9523817 , 4950.04149321,
4784.02374325, 5077.32398176, 5033.05036819, 4573.73237973,
4756.35190469, 4961.11155678, 5282.07699314, 5021.98030463,
5276.54528171, 5442.56303168, 5154.7945046 , 4961.11155678,
5199.06811816, 5337.41402956, 5187.99805459, 5298.67876814,
5342.9523817 , 5564.30716808, 5730.32491804, 5813.33379302,
5796.73201803, 5946.147993 , 6012.55509298, 5575.37723165,
5730.32491804, 5558.77545665, 5088.38740461, 5215.66989315,
5182.46634316, 4806.15722967, 4894.7044568 , 5215.66989315,
5525.57190666, 5935.07792943, 6040.22693154, 6001.48502941,
5553.23710451, 5569.83887951, 5520.03355452, 5459.16480667,
5409.35948168, 5149.26279317, 4988.77675463, 5785.66195446,
6410.99769289, 6565.94537929, 6377.7941429 , 6073.43048154,
6150.90100438, 5968.28147942, 5403.82112954, 5846.53734302,
6189.64290651, 6754.10325639, 6294.78526792, 6632.35247928,
6405.46598146, 6399.92762933, 6455.27130645, 6709.82964283,
6715.36135426, 6936.72278135, 6676.62609284, 5935.07792943,
6001.48502941, 5973.81983156, 5968.28147942, 6073.43048154,
5885.27260444, 6167.50277938, 6283.71520435, 6648.95425427,
6482.93650431, 6327.98881791, 5636.24597949, 5171.39627959,
5647.31604306, 6139.83758152, 6200.70632937, 6023.62515655,
6372.26243147, 6405.46598146, 6233.90987936, 6007.02338155,
6067.8921294 , 5951.67970442, 5979.35154299, 6211.77639294,
6300.31697935, 6482.93650431, 6322.45710648, 6211.77639294,
6101.09567939, 6195.17461794, 6233.90987936, 5968.28147942,
5813.33379302, 5868.67082944, 6001.48502941, 6040.22693154,
5907.41273157, 5990.42160656, 6018.08680441, 5719.25485447,
5437.02467954, 5437.02467954, 5525.57190666, 5647.31604306,
5835.46727945, 5879.74089301, 5608.58078164, 5553.23710451,
5885.27260444, 6051.2903544 , 5763.52846804, 5907.41273157,
6289.25355649, 6217.30810437, 5841.00563159, 5912.944443 ,
6034.68857941, 6106.63403153, 5841.00563159, 5885.27260444,
5591.97900664, 5265.47521814, 5088.38740461, 5049.65214319,
5193.53640673, 5282.07699314, 5420.42290454, 5254.40515458,
5066.25391819, 5071.78562962, 5354.01580455, 4883.63439323,
4844.89913181, 5193.53640673, 5531.10361809, 4728.68670683,
4629.07605685, 4867.03261823, 5431.49296811, 5531.10361809,
5326.3506067 , 4817.22729324, 4712.08493184, 4673.3430297 ,
5138.1927296 , 5580.90894308, 5580.90894308, 5298.67876814,
4894.7044568 , 3976.06847986, 3760.2454049 , 4180.82813195,
4629.07605685, 4103.3509684 , 4119.95274339, 3826.65250489,
4678.88138184, 4944.50978178, 4324.70575478, 4241.6968798 ,
4684.41309327, 4955.57320464, 4429.8547569 , 4297.04055693,
4451.98824332, 4828.29735681, 5116.05924318, 4867.03261823,
4839.36077967, 4734.21841826, 4385.58114334, 4136.55451839,
3327.59650779, 3588.69594351, 3644.03297993, 3572.09416851,
3494.61700496, 3378.40457999, 3334.13760714, 3079.57263005,
3179.18328003, 3553.32752206, 3616.36778207, 3798.98730703,
4009.27202985, 4435.38646833, 3494.61700496, 3134.91630718,
3350.73938213, 3505.68706853, 3372.87286856, 3489.08529353,
3743.64362991, 3699.37665706, 3561.02410495, 3472.48351854,
3660.63475492, 3062.97085506, 2847.1477801 , 3295.395705 ,
3151.51808218, 3184.72163217, 3079.81833632, 3195.78505503,
3195.78505503, 3522.28884353, 4219.56339337, 4374.51107977,
4125.48445482, 2913.55488009, 3345.20102999, 3749.18198205,
3765.78375704, 4108.88267983, 3561.02410495, 3190.2533436 ,
3179.18328003, 3085.11098219, 3367.34115713, 3621.8994935 ,
3826.65250489, 3942.86492986, 3422.67819355, 3406.07641855,
3383.94293213, 2979.96198007, 2847.1477801 , 2750.66490453,
3149.10750445, 2830.54600511, 2902.49145723, 2896.95310509,
2896.95310509, 2913.55488009, 2647.92648015])
#包装曲线绘制函数
def lineplot(x_data,y_data,x_label,y_label,title):
#创建绘图对象
_,ax = plt.subplots()
#绘制拟合曲线,
ax.plot(x_data,y_data,lw=2,color="#539caf",alpha=1)
#添加标题和坐标说明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
#调用绘图函数
lineplot(x_data=daily_data['temp'],y_data=fitted_values,x_label="Normalized temperature (c)",y_label="Check outs",title="Line of Beat Fit for Number of Check Outs VS Temperature")
#带置信区间的曲线
#data 4-6索引就是上下界
predict_mean_ci_low,predict_mean_ci_upp = data[:,4:6].T
#创建置信区间DataFrame,上下界
CI_df = pd.DataFrame(columns = ["x_data","low_CI","upper_CI"])
CI_df["x_data"] = daily_data["temp"]
CI_df["low_CI"] = predict_mean_ci_low
CI_df['upper_CI'] = predict_mean_ci_upp
CI_df.sort_values("x_data",inplace=True)#根据x_data进行排序
#绘制置信区间
def lineplotCI(x_data,y_data,sorted_x,low_CI,upper_CI,x_label,y_label,title):
#创建绘图对象
_,ax = plt.subplots()
#绘制预测曲线
ax.plot(x_data,y_data,lw=1,color="#539caf",alpha=1,label="Fit")
#绘制置信区间,顺序填充
ax.fill_between(sorted_x,low_CI,upper_CI,color="#539caf",alpha=0.4,label="95% CI")
#添加标题和坐标说明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
#显示图例,配合label参数,loc="best"自适应方式
ax.legend(loc="best")
lineplotCI(x_data=daily_data['temp'],y_data=fitted_values,sorted_x=CI_df['x_data'],low_CI=CI_df['low_CI'],upper_CI=CI_df['upper_CI'],x_label="Normalized temperature (c)",y_label="Check outs",title="Line of Beat Fit for Number of Check Outs VS Temperature")
#双坐标曲线图
def lineplot2y(x_data,x_label,y1_data,y1_color,y1_label,y2_data,y2_color,y2_label,title):
_,ax1 = plt.subplots()
ax1.plot(x_data,y1_data,color=y1_color)
#添加标题和坐标说明
ax1.set_ylabel(y1_label,color=y1_color)
ax1.set_xlabel(x_label)
ax1.set_title(title)
ax2 = ax1.twinx()#两个绘图对象共享横坐标轴
ax2.plot(x_data,y2_data,color=y2_color)
ax2.set_ylabel(y2_label,color=y2_color)
#调用绘图函数
#风速,日期,租车量变化
lineplot2y(x_data=daily_data['dteday'],x_label='Day',y1_data=daily_data['cnt'],y1_color="#539caf",y1_label="Check outs",y2_data=daily_data['windspeed'],y2_color="#7663b0",y2_label='Normalized widspeed',title="Check Outs and Windspeed Over Time")
分布分析
- 灰度图
- 粗略区间计算
#绘制灰度图的函数
def histogram(data,x_label,y_label,title):
_,ax = plt.subplots()
res = ax.hist(data,color="#539caf",bins=10)#设置bin的数量
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
return res
#绘图函数调用
#app注册自行车,租用自行车数量。
res = histogram(data=daily_data['registered'],x_label='Check outs',y_label='Frequency',title="Distribution of Registered Check Outs")
#组间进行分析
#组件定量比较
#分组粒度
#组件聚类
#柱状图
#统计 周1-周日租出去车的量
mean_total_co_day = daily_data[["weekday","cnt"]].groupby("weekday").agg([np.mean,np.std])
mean_total_co_day.columns = mean_total_co_day.columns.droplevel()
#定义绘制柱状图函数
def barplot(x_data,y_data,error_data,x_label,y_label,title):
_,ax = plt.subplots()
#柱状图
ax.bar(x_data,y_data,color="#539caf",align="center")
#绘制方差
#ls = "none"去掉bar之间的连接
ax.errorbar(x_data,y_data,yerr=error_data,color="#297083",ls="none",lw=5)
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
#绘制函数调用
barplot(x_data=mean_total_co_day.index.values,y_data=mean_total_co_day["mean"],error_data=mean_total_co_day['std'],x_label="Day of week",y_label='Check outs',title="Total Check Out By Day of Week(0 = Sunday)")
#堆叠柱状图
mean_by_reg_co_day = daily_data[['weekday','registered','casual']].groupby('weekday').mean()
mean_by_reg_co_day['total'] = mean_by_reg_co_day['registered'] + mean_by_reg_co_day['casual']
mean_by_reg_co_day['reg_prop'] = mean_by_reg_co_day['registered']/mean_by_reg_co_day['total']
mean_by_reg_co_day['casual_prop'] = mean_by_reg_co_day['casual'] /mean_by_reg_co_day['total']
#绘制堆积柱状图
def stackedbarplot(x_data,y_data_list,y_data_names,colors,x_label,y_label,title):
_,ax=plt.subplots()
#循环绘制堆积柱状图
for i in range(0,len(y_data_list)):
if i == 0:
ax.bar(x_data,y_data_list[i],color=colors[i],align='center',label=y_data_names[i])
else:
#采用堆积的方式,除了第一个分类,后面的分类都从前一个分类的柱状图接着画
#用归一化保证最终累计结果为1
ax.bar(x_data,y_data_list[i],color=colors[i],bottom=y_data_list[i-1],align='center',label=y_data_names[i])
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
ax.legend(loc='upper right')#设定图例位置
stackedbarplot(x_data=mean_by_reg_co_day.index.values,
y_data_list=[mean_by_reg_co_day['reg_prop'],mean_by_reg_co_day['casual_prop']],
y_data_names=["Registered","Casual"],
colors=['#539caf','#7663b0'],
x_label="Day of week",
y_label='Proportion of check out',
title='checkOut by Registration status and day of week (0 = Sunday)')
#注册用户租自行车多,不注册租自行车少,周六周日,这两天不注册用户租自行车多。