import seaborn as sns
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
def sinplot(filp=1):
x = np.linspace(0,14,100) # 0-14 生成100个点
for i in range(1,7):
plt.plot(x,np.sin(x + i*0.5)*(7-i)*filp)
sinplot()
sns.set() # 设置风格或者主题
sinplot()
风格主题一般有
np.random.seed(0) # 设置全局随机种子
sns.set_style('ticks')
# 产生一个正态分布的20*6的数组,点对点
data = np.random.normal(size = (20,6))+np.arange(6)/2
sns.boxplot(data=data)
# 去掉上和右框线
sns.boxenplot(data=data)
sns.despine(offset=10)
sns.violinplot(data)
sns.despine(offset=10) # offset调节轴线距离
sns.set_style('whitegrid')
sns.boxplot(data=data,palette='deep')
# sns.despine(left=True)
sns.set_style('whitegrid')
sns.boxplot(data=data,palette='deep')
sns.despine(left=True) # 指定隐藏
## 指定多个子图
with sns.axes_style('ticks'):
plt.subplot(211)
sinplot()
plt.subplot(212)
sinplot(-1)
sns.set()
sns.set_context('paper') # 类似的参数还有poster\notebook
plt.figure(figsize=(8,6))
sinplot()
sns.set_context('poster') # 类似的参数还有poster(要大一点)\notebook
plt.figure(figsize=(8,6))
sinplot()
sns.set_context('notebook',font_scale=5.5,rc={'lines.linewidth':10.5}) # font_size 坐标数字大小,rc是线条粗细
plt.figure(figsize=(8,6))
sinplot()
调色板
current_palette = sns.color_palette()
sns.palplot(current_palette)
一共10个颜色,通过color_palette()调节个数
sns.palplot(sns.color_palette('hls',20)) # hls是颜色空间,传出20种颜色
sns.set(rc={'figure.figsize':(20,8)}) # 指定画布大小
data = np.random.normal(size=(20,20))+np.arange(20)/2
sns.boxenplot(data=data,palette=sns.color_palette('hls',20))
sns.palplot(sns.hls_palette(20,l=0.09,s=0.01))
# 用于一对儿的数据
sns.palplot(sns.color_palette('Paired',20)) # 一对数据颜色相近
# 在指定固定颜色的时候可以用这种方法
plt.plot([0,1],[2,3],sns.xkcd_rgb['red'],lw=4)
plt.plot([0,1],[2,4],sns.xkcd_rgb['green'],lw=3)
plt.plot([0,1],[2,5],sns.xkcd_rgb['blue'],lw=2)
[]
# 或者使用这种方法指定
colors= ['windows blue','amber','purple']
sns.palplot(sns.xkcd_palette(colors))
一般热力图使用
sns.palplot(sns.color_palette('Reds')) # 注意这里要首字母大写。是复数形式
sns.palplot(sns.color_palette('Reds_r')) # 注意这里要首字母大写。是复数形式
# _r = reverse 颠倒
色调线性变换
sns.palplot(sns.color_palette('cubehelix',10)) # 这个颜色还蛮好看的
sns.palplot(sns.cubehelix_palette(10,start =-0.1,rot = - 0.75,reverse = True))
sns.palplot(sns.cubehelix_palette(10,start =-0.1,rot = - 0.75))
sns.palplot(sns.light_palette('red',reverse=True))
sns.palplot(sns.dark_palette('red'))
# 参数长度为300个多元正态分布,并做转置
np.random.seed(2)
x,y = np.random.multivariate_normal([0,0],[[1,-0.5],[-0.5,1]],size=300).T
pal = sns.light_palette('green',as_cmap=True)
sns.kdeplot(x,y,camp=pal)
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\distributions.py:437: UserWarning: The following kwargs were not used by contour: 'camp'
cset = contour_func(xx, yy, z, n_levels, **kwargs)
from scipy import stats,integrate # 导入科学计算库
# 设置主题参数
sns.set(color_codes=True)
np.random.seed(sum(map(ord,'distributions')))
x = np.random.normal(size=100)
sns.distplot(x,kde=False) # kde是是否需要核密度估计的参数
sns.distplot(x,bins = 5,kde=True) # kde是是否需要核密度估计的参数
# bins 调节多少个区间
sns.distplot(x,kde=False,fit=stats.gamma) # fit拟合分布函数gamma
np.random.normal(size=100)
array([-7.93670539e-02, 2.35623791e+00, 1.69823731e-01, -1.65983154e+00,
1.38854404e+00, -1.56926211e+00, -7.30554288e-01, -6.02577362e-01,
8.98263749e-01, 7.82067606e-01, -1.41254369e-01, -4.47245943e-01,
2.91969802e-01, 4.27114940e-01, 5.94314228e-01, 1.53917596e-01,
-1.53200567e+00, -5.69558781e-01, 7.88319084e-01, 2.82753716e-01,
-5.86497918e-01, -1.16397748e+00, 2.88947587e-01, -2.63436097e-01,
-1.50246105e+00, -1.75745611e+00, -1.36483802e+00, -7.33840531e-01,
-5.64724904e-02, 1.33945114e+00, 2.06119424e+00, 5.25737590e-01,
-1.97816956e-03, -3.53299540e-01, -3.12083544e-01, -7.51424413e-01,
5.79962214e-01, -1.76068456e-01, -9.22589067e-01, 4.57838865e-01,
4.67008945e-01, -8.33531122e-01, 2.33001609e-01, -1.61272298e+00,
1.48554200e-01, -6.34437667e-01, -2.68405188e-01, -2.83733400e-01,
-1.46665528e+00, -8.31031975e-01, 2.20334408e-01, -7.42256917e-01,
-1.03642212e+00, 1.46894035e-01, 1.83128376e+00, -7.97836688e-01,
-2.51106251e-02, -7.51063377e-01, 2.59559153e-01, 3.74713802e-01,
-1.62470969e+00, 1.25857468e+00, 1.11413325e-01, -9.66904388e-01,
5.24940304e-01, -1.04693257e+00, 1.19548873e+00, 8.69765920e-01,
-5.38073347e-01, 2.78726165e-01, 2.47195055e-01, 3.69496598e-01,
-2.80957399e-01, -1.70529391e+00, 9.04984230e-01, 1.60629597e+00,
-2.67589907e-01, 5.65918694e-01, -3.29983562e-01, -1.48961748e-02,
-1.49366521e+00, 5.76244481e-01, 1.41480402e+00, -3.64581041e-01,
4.19481870e-01, 6.01141850e-01, 9.62268465e-01, -2.15160561e-01,
8.14040853e-01, -1.20475982e+00, 5.40559848e-01, 7.75185845e-01,
1.73212150e+00, -2.17369452e-02, -1.71062699e+00, -1.28783899e+00,
9.02107169e-01, -1.00361959e+00, -8.49754217e-01, -2.09257982e+00])
np.random.normal(100) # 不加size=1000和加了之后的区别
99.1868940499386
import pandas as pd
# 参数多元分布数据
mean,cov = [0,1],[(1,0.5),(0.5,1)]
data = np.random.multivariate_normal(mean,cov,200)
df = pd.DataFrame(data,columns = ['x','y'])
df
x | y | |
---|---|---|
0 | -0.579463 | 0.414803 |
1 | 0.880828 | 2.285103 |
2 | -0.307332 | 0.653860 |
3 | 1.029192 | 1.250403 |
4 | -0.892471 | 1.026103 |
... | ... | ... |
195 | 0.249314 | 1.051238 |
196 | -1.222601 | 0.894471 |
197 | 2.329006 | 2.152045 |
198 | -0.192728 | 1.552358 |
199 | -0.325603 | 1.476723 |
200 rows × 2 columns
sns.jointplot(x = 'x',y = 'y',data=df,color = 'k')
x,y = np.random.multivariate_normal(mean,cov,1000).T
with sns.axes_style('ticks'):
sns.jointplot(x = x,y = y,kind = 'hex',color = 'k')
iris = sns.load_dataset('iris')
sns.pairplot(iris)
np.random.seed(sum(map(ord,'regression')))
tips = sns.load_dataset('tips') # 导入tips数据
tips.head(5) # 查看前五行
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
regplot()和lmplot()都可以画回归关系的图
plt.figure(figsize=(8,6))
sns.regplot(x = 'total_bill',y='tip',data=tips)
sns.set(rc={'figure.figsize':(8,8)}) # 设置画布大小
sns.regplot(data=tips,x='size',y='tip')
sns.regplot(x='size',y = 'tip',data=tips,x_jitter=0.05) # 给x加上随机波动为0.05
## 类别值的可视化展示
sns.set(style='whitegrid',color_codes=True)
np.random.seed(sum(map(ord,'categorical')))
titanic = sns.load_dataset('titanic')
tips = sns.load_dataset('tips')
iris = sns.load_dataset('iris')
sns.stripplot(x='day',y='total_bill',data=tips,jitter=.1)
像圣诞树一样的,和jitter的功能相同
sns.swarmplot(x='day',y='total_bill',data=tips)
sns.swarmplot(x='day',y='total_bill',hue='sex',data=tips) # 加一个指标
sns.boxplot(x='day',y='total_bill',hue='time',data=tips)
#,rc={'lines.linewidth':10.5}
sns.violinplot(x='total_bill',y='day',hue='time',data=tips)
sns.violinplot(x='day',y='total_bill',hue='sex',data=tips,split=True)
sns.violinplot(x='day',y='total_bill',data=tips)
sns.swarmplot(x='day',y='total_bill',data=tips,color='w',alpha=0.5)
表示集中趋势
sns.barplot(x='sex',y='survived',hue='class',data=titanic)
点图可以更好地描述差异性
sns.pointplot(x='sex',y='survived',hue='class',data=titanic)
sns.pointplot(x='class',y='survived',hue='sex',data=titanic,
palette={'male':"g",'female':'m'},
markers=['.','o'],
linestyles=['-','--'])
sns.boxplot(data=iris,orient='v')# orient调整横或竖,'h'=横向,'v'=竖
sns.factorplot(x='day',y='total_bill',hue='smoker',data=tips,kind='bar')
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\categorical.py:3669: UserWarning: The `factorplot` function has been renamed to `catplot`. The original name will be removed in a future release. Please update your code. Note that the default `kind` in `factorplot` (`'point'`) has changed `'strip'` in `catplot`.
warnings.warn(msg)
sns.factorplot(x='day',y='total_bill',hue='smoker',col='time',data=tips,kind='swarm')
sns.factorplot(x='day',y='total_bill',hue='smoker',col='day',data=tips,kind='box',size=4,aspect=0.5)
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\categorical.py:3675: UserWarning: The `size` parameter has been renamed to `height`; please update your code.
warnings.warn(msg, UserWarning)
tips
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
... | ... | ... | ... | ... | ... | ... | ... |
239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
g = sns.FacetGrid(tips,col='time')
g.map(plt.hist,'tip')
g = sns.FacetGrid(tips,col='sex',hue='smoker')
g.map(plt.scatter,'total_bill','tip',alpha=0.5)
g.add_legend() # smoker的标签
g = sns.FacetGrid(tips,row='smoker',col='time',margin_titles=True)
g.map(sns.regplot,'size','total_bill',color='0.1',fit_reg=True,x_jitter=0.1)
g = sns.FacetGrid(tips,col='day',size=4,aspect=0.5)
g.map(sns.barplot,'sex','total_bill')
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\axisgrid.py:243: UserWarning: The `size` parameter has been renamed to `height`; please update your code.
warnings.warn(msg, UserWarning)
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\axisgrid.py:728: UserWarning: Using the barplot function without specifying `order` is likely to produce an incorrect plot.
warnings.warn(warning)
# 导入pandas的类别方法
from pandas import Categorical
ordered_days = tips.day.value_counts().index
print(ordered_days)
ordered_days = Categorical(['Thur','Fri','Sat','Sun'])
g = sns.FacetGrid(tips,row='day',row_order=ordered_days,size = 2,aspect = 4)
g.map(sns.boxplot,'total_bill')
CategoricalIndex(['Sat', 'Sun', 'Thur', 'Fri'], categories=['Thur', 'Fri', 'Sat', 'Sun'], ordered=False, dtype='category')
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\axisgrid.py:243: UserWarning: The `size` parameter has been renamed to `height`; please update your code.
warnings.warn(msg, UserWarning)
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\axisgrid.py:728: UserWarning: Using the boxplot function without specifying `order` is likely to produce an incorrect plot.
warnings.warn(warning)
pal = dict(Lunch = 'seagreen',Dinner='gray')
g = sns.FacetGrid(tips,hue='time',palette=pal,size=5,hue_kws={'marker':['*','v']})
g.map(plt.scatter,'total_bill','tip',s=50,alpha=0.7,linewidth=0.5,edgecolor='w') # s表示圆圈的大小
g.add_legend()
with sns.axes_style('white'):
g = sns.FacetGrid(tips,row='sex',col='smoker',margin_titles=True,size=2.5)
g.map(plt.scatter,'total_bill','tip',color='#ff3384',edgecolor='w',lw=0.5)
g.set_axis_labels('Total bill ($)','Tip')
g.set(xticks=[10,30,50],yticks=[2,6,10]) # 调节轴的长度
g.fig.subplots_adjust(wspace=0.02,hspace=0.02) # 调节子图间隔
iris = sns.load_dataset('iris')
g = sns.PairGrid(iris)
g.map(plt.scatter)
g = sns.PairGrid(iris)
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)
g = sns.PairGrid(iris,hue = 'species') # 添加区分的类别
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)
g.add_legend()
g = sns.PairGrid(iris,vars=['sepal_length','sepal_width'],hue='species')
g.map(plt.scatter) # 指定变量画图
g = sns.PairGrid(tips,hue='size',palette='GnBu_d')
g.map(plt.scatter,s=50,edgecolor='w')
g.add_legend()
sns.set()
np.random.seed(20)
uniform_data = np.random.normal(size=(3,3))
print(uniform_data)
heatmap = sns.heatmap(uniform_data)
[[ 0.88389311 0.19586502 0.35753652]
[-2.34326191 -1.08483259 0.55969629]
[ 0.93946935 -0.97848104 0.50309684]]
np.random.seed(20)
uniform_data = np.random.normal(size=(3,3))
print(uniform_data)
heatmap = sns.heatmap(uniform_data,vmin=0.2,vmax=0.5,center=0)
# 调整调色板的区间 vmin and vmax
# 调整调色板的中心 center
[[ 0.88389311 0.19586502 0.35753652]
[-2.34326191 -1.08483259 0.55969629]
[ 0.93946935 -0.97848104 0.50309684]]
flights = sns.load_dataset('flights')
flights
year | month | passengers | |
---|---|---|---|
0 | 1949 | January | 112 |
1 | 1949 | February | 118 |
2 | 1949 | March | 132 |
3 | 1949 | April | 129 |
4 | 1949 | May | 121 |
... | ... | ... | ... |
139 | 1960 | August | 606 |
140 | 1960 | September | 508 |
141 | 1960 | October | 461 |
142 | 1960 | November | 390 |
143 | 1960 | December | 432 |
144 rows × 3 columns
flights = sns.load_dataset('flights')
flights = flights.pivot('month', 'year', 'passengers')
print(flights)
# 读到表格中
#flight.to_csv("res.csv",sep=',',encoding='gbk')
sns.set(rc={'figure.figsize':(8,8)})
ax = sns.heatmap(flights)
year 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 \
month
January 112 115 145 171 196 204 242 284 315 340 360
February 118 126 150 180 196 188 233 277 301 318 342
March 132 141 178 193 236 235 267 317 356 362 406
April 129 135 163 181 235 227 269 313 348 348 396
May 121 125 172 183 229 234 270 318 355 363 420
June 135 149 178 218 243 264 315 374 422 435 472
July 148 170 199 230 264 302 364 413 465 491 548
August 148 170 199 242 272 293 347 405 467 505 559
September 136 158 184 209 237 259 312 355 404 404 463
October 119 133 162 191 211 229 274 306 347 359 407
November 104 114 146 172 180 203 237 271 305 310 362
December 118 140 166 194 201 229 278 306 336 337 405
year 1960
month
January 417
February 391
March 419
April 461
May 472
June 535
July 622
August 606
September 508
October 461
November 390
December 432
flights = sns.load_dataset('flights')
# 取出这三个属性画热力图,坐标点的位置是passengers
flights = flights.pivot('month', 'year', 'passengers')
ax = sns.heatmap(flights, annot=False, fmt='d',linewidths = 0.05,cmap='PuRd') #实际的数值注解在图上
# annot 打开数据标签。fmt是调节格式整型,默认是科学计数法的格式
# linewidths 调节间隔
# cmap 调色板
cmap调色板参数请参照!
https://blog.csdn.net/qq_38048756/article/details/118724555