Matplotlib 是⼀个 Python 的 2D 绘图库,它交互式环境⽣成出版质量级别的图形。通过 Matplotlib这个
标准类库,开发者只需要⼏⾏代码就可以实现⽣成绘图,折线图、散点图、柱状图、饼图、直⽅图、组
合图等数据分析可视化图表。
import numpy as np
import matplotlib.pyplot as plt
# 1、图形绘制
x = np.linspace(0,2*np.pi) # x轴
# y轴
y = np.sin(x) # 正弦
# 绘制线形图
# 调整尺⼨
plt.figure(figsize=(9,6))
plt.plot(x,y)
# 继续调⽤plot绘制多条线形图
# 2、设置⽹格线
plt.grid(linestyle = '--',# 样式
color = 'green',# 颜⾊
alpha = 0.75) # 透明度
# 3、设置坐标轴范围
plt.axis([-1,10,-1.5,1.5])
plt.xlim([-1,10])
plt.ylim([-1.5,1.5])
import numpy as np
import matplotlib.pyplot as plt
# 1、图形绘制
x = np.linspace(0,2*np.pi) # x轴
# y轴
y = np.sin(x) # 正弦
plt.plot(x,y)
# 2、设置x轴y轴刻度
plt.xticks(np.arange(0,7,np.pi/2))
plt.yticks([-1,0,1])
# 3、设置x轴y轴刻度标签
_ = plt.yticks(ticks = [-1,0,1],labels=['min',' 0 ','max'],fontsize = 20,ha
= 'right')
font=
{'family':'serif','style':'italic','weight':'normal','color':'red','size':16}
_ = plt.xticks(ticks = np.arange(0,7,np.pi/2),
# LaTex语法,输⼊格式为:r'$\sigma$' #其中的sigma对应于希腊字⺟的σ
labels = ['0',r'$\frac{\pi}{2}$',r'$\pi$',r'$\frac{3\pi}
{2}$',r'$2\pi$'],
fontsize = 20,
fontweight = 'normal',
color = 'red')
# 4、坐标轴标签,标题
plt.ylabel('y = sin(x)',rotation = 0,
horizontalalignment = 'right',fontstyle = 'normal',fontsize = 20)
# 获取电脑上的字体库
from matplotlib.font_manager import FontManager
fm = FontManager()
mat_fonts = set(f.name for f in fm.ttflist)
# print(mat_fonts)
plt.rcParams['font.sans-serif'] = 'Songti SC' # 设置宋体,显示中⽂
plt.title('正弦波')
import numpy as np
import matplotlib.pyplot as plt
# 1、图形绘制
x = np.linspace(0,2*np.pi) # x轴
# y轴
y = np.sin(x) # 正弦
# 绘制线形图
# 调整尺⼨
plt.figure(figsize=(9,6))
plt.plot(x,y)
# 2、图例
plt.plot(x,np.cos(x)) # 余弦波
plt.legend(['Sin','Cos'],fontsize = 18,loc = 'center',ncol = 2,bbox_to_anchor =
[0,1.05,1,0.2])
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-np.pi,np.pi,50)
plt.rcParams['axes.unicode_minus'] = False#字符显示
plt.figure(figsize=(9,6))
plt.plot(x,np.sin(x),x,np.cos(x))
ax = plt.gca() # 获取当前视图
# 右边和上⾯脊柱消失
ax.spines['right'].set_color('white')
ax.spines['top'].set_color('#FFFFFF')
# 设置下⾯左边脊柱位置,data表示数据,axes表示相对位置0~1
ax.spines['bottom'].set_position(('data',0))
ax.spines['left'].set_position(('data',0))
plt.yticks([-1,0,1],labels=['-1','0','1'],fontsize = 18)
_ = plt.xticks([-np.pi,-np.pi/2,np.pi/2,np.pi],
labels=[r'$-\pi$',r'$-\frac{\pi}{2}$',r'$\frac{\pi}{2}$',r'$\pi$'],
fontsize = 18)
# 1、图形绘制
x = np.linspace(0,2*np.pi) # x轴
# y轴
y = np.sin(x) # 正弦波
plt.figure(linewidth = 4)
plt.plot(x,y,color = 'red')
plt.plot(x,np.cos(x),color = 'k') # 余弦波
ax = plt.gca() # 获取视图
ax.set_facecolor('lightgreen') # 设置视图背景颜⾊
# 2、图例
plt.legend(['Sin','Cos'],fontsize = 18,loc = 'center',ncol = 2,bbox_to_anchor =
[0,1.05,1,0.2])
# plt.tight_layout() # ⾃动调整布局空间,就不会出现图⽚保存不完整
plt.savefig('./基础5.png', # ⽂件名:png、jpg、pdf
dpi = 100, # 保存图⽚像素密度
facecolor = 'violet', # 视图与边界之间颜⾊设置
edgecolor = 'lightgreen', # 视图边界颜⾊设置
bbox_inches = 'tight')# 保存图⽚完整
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0,2*np.pi,20)
y1 = np.sin(x)
y2 = np.cos(x)
# 设置颜⾊,线型,点型
plt.plot(x,y1,color = 'indigo',ls = '-.',marker = 'p')
plt.plot(x,y2,color = '#FF00EE',ls = '--',marker = 'o')
plt.plot(x,y1 + y2,color = (0.2,0.7,0.2),marker = '*',ls = ':')
plt.plot(x,y1 + 2*y2,linewidth = 3,alpha = 0.7,color = 'orange') # 线宽、透明度
plt.plot(x,2*y1 - y2,'bo--') # 参数连⽤
import numpy as np
import pandas as pd
def f(x):
return np.exp(-x) * np.cos(2*np.pi*x)
x = np.linspace(0,5,50)
plt.figure(figsize=(9,6))
plt.plot(x,f(x),color = 'purple',
marker = 'o',
ls = '--',
lw = 2,
alpha = 0.6,
markerfacecolor = 'red',# 点颜⾊
markersize = 10,# 点⼤⼩
markeredgecolor = 'green',#点边缘颜⾊
markeredgewidth = 3)#点边缘宽度
plt.xticks(size = 18) # 设置刻度⼤⼩
plt.yticks(size = 18)
x = np.linspace(-np.pi,np.pi,50)
y = np.sin(x)
# ⼦视图1
plt.figure(figsize=(9,6))
ax = plt.subplot(221) # 两⾏两列第⼀个⼦视图
ax.plot(x,y,color = 'red')
ax.set_facecolor('green') # 调⽤⼦视图设置⽅法,设置⼦视图整体属性
# ⼦视图2
ax = plt.subplot(2,2,2) # 两⾏两列第⼆个⼦视图
line, = ax.plot(x,-y) # 返回绘制对象
line.set_marker('*') # 调⽤对象设置⽅法,设置属性
line.set_markerfacecolor('red')
line.set_markeredgecolor('green')
line.set_markersize(10)
# ⼦视图3
ax = plt.subplot(2,1,2) # 两⾏⼀列第⼆⾏视图
plt.sca(ax) # 设置当前视图
x = np.linspace(-np.pi,np.pi,200)
plt.plot(x,np.sin(x*x),color = 'red')
x = np.linspace(-np.pi,np.pi,25)
y = np.sin(x)
fig = plt.figure(figsize=(9,6)) # 创建视图
plt.plot(x,y)
# 嵌套⽅式⼀,axes轴域(横纵坐标范围),⼦视图
ax = plt.axes([0.2,0.55,0.3,0.3]) # 参数含义[left, bottom, width, height]
ax.plot(x,y,color = 'g')
# 嵌套⽅式⼆
ax = fig.add_axes([0.55,0.2,0.3,0.3]) # 使⽤视图对象添加⼦视图
ax.plot(x,y,color = 'r')
x = np.linspace(0,2*np.pi)
# sharex:所有⼩图共享x轴 sharey:表示所有⼩图共享y轴 坐标轴以所有⼩图中范围最⼤的进⾏显示
fig, ((ax11,ax12,ax13), (ax21,ax22,ax23),(ax31,ax32,ax33)) = plt.subplots(3, 3)
# 也可通过plt.subplot() ⼀个个添加⼦视图
fig.set_figwidth(9)
fig.set_figheight(6)
ax11.plot(x,np.sin(x))
ax12.plot(x,np.cos(x))
ax13.plot(x,np.tanh(x))
ax21.plot(x,np.tan(x))
ax22.plot(x,np.cosh(x))
ax23.plot(x,np.sinh(x))
ax31.plot(x,np.sin(x) + np.cos(x))
ax32.plot(x,np.sin(x*x) + np.cos(x*x))
ax33.plot(x,np.sin(x)*np.cos(x))
# 紧凑显示,边框会⽐较⼩,可以注释掉该⾏查看效果
plt.tight_layout()
plt.show()
x = np.linspace(0,2*np.pi,200)
fig = plt.figure(figsize=(12,9))
# 使⽤切⽚⽅式设置⼦视图
ax1 = plt.subplot(3,1,1) # 视图对象添加⼦视图
ax1.plot(x,np.sin(10*x))
# 设置ax1的标题,xlim、ylim、xlabel、ylabel等所有属性现在只能通过set_属性名的⽅法设置
ax1.set_title('ax1_title') # 设置⼩图的标题
ax2 = plt.subplot(3,3,(4,5))
ax2.set_facecolor('green')
ax2.plot(x,np.cos(x),color = 'red')
ax3 = plt.subplot(3,3,(6,9))
ax3.plot(x,np.sin(x) + np.cos(x))
ax4 = plt.subplot(3,3,7)
ax4.plot([1,3],[2,4])
ax5 = plt.subplot(3,3,8)
ax5.scatter([1,2,3], [0,2, 4])
ax5.set_xlabel('ax5_x',fontsize = 12)
ax5.set_ylabel('ax5_y',fontsize = 12)
x = np.linspace(-np.pi,np.pi,100)
data1 = np.exp(x)
data2 = np.sin(x)
plt.figure(figsize=(9,6))
plt.rcParams['font.size'] = 16 # 设置整体字体⼤⼩
ax1 = plt.gca() # 获取当前轴域
ax1.set_xlabel('time (s)') # 设置x轴标签
ax1.set_ylabel('exp', color='red') # 设置y轴标签
ax1.plot(t, data1, color='red') # 数据绘制
ax1.tick_params(axis='y', labelcolor='red') # 设置y轴刻度属性
ax2 = ax1.twinx() # 创建新axes实例,共享x轴,并设置
ax2.set_ylabel('sin', color='blue')
ax2.plot(t, data2, color='blue')
ax2.tick_params(axis='y', labelcolor='blue')
plt.tight_layout() # 紧凑布局
Pyplot函数 | API⽅法 | 描述 |
---|---|---|
text() | mpl.axes.Axes.text() | 在Axes对象的任意位置添加⽂字 |
xlabel() | mpl.axes.Axes.set_xlabel() | 为X轴添加标签 |
ylabel() | mpl.axes.Axes.set_ylabel() | 为Y轴添加标签 |
title() | mpl.axes.Axes.set_title() | 为Axes对象添加标题 |
legend() | mpl.axes.Axes.legend() | 为Axes对象添加图例 |
annnotate() | mpl.axes.Axes.annotate() | 为Axes对象添加注释(箭头可选) |
figtext() | mpl.figure.Figure.text() | 在Figure对象的任意位置添加⽂字 |
suptitle() | mpl.figure.Figure.suptitle() | 为Figure对象添加中⼼化的标题 |
x = np.random.randint(0,10,size = 15)
# ⼀图多线
plt.figure(figsize=(9,6))
plt.plot(x,marker = '*',color = 'r')
plt.plot(x.cumsum(),marker = 'o')
# 多图布局
fig,axs = plt.subplots(2,1)
fig.set_figwidth(9)
fig.set_figheight(6)
axs[0].plot(x,marker = '*',color = 'red')
axs[1].plot(x.cumsum(),marker = 'o')
labels = ['G1', 'G2', 'G3', 'G4', 'G5','G6'] # 级别
men_means = np.random.randint(20,35,size = 6)
women_means = np.random.randint(20,35,size = 6)
men_std = np.random.randint(1,7,size = 6)
women_std = np.random.randint(1,7,size = 6)
width = 0.35
plt.bar(labels, # 横坐标
men_means, # 柱⾼
width, # 线宽
yerr=4, # 误差条
label='Men')#标签
plt.bar(labels, women_means, width, yerr=2, bottom=men_means,
label='Women')
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.legend()
labels = ['G1', 'G2', 'G3', 'G4', 'G5','G6'] # 级别
men_means = np.random.randint(20,35,size = 6)
women_means = np.random.randint(20,35,size = 6)
x = np.arange(len(men_means))
plt.figure(figsize=(9,6))
rects1 = plt.bar(x - width/2, men_means, width) # 返回绘图区域对象
rects2 = plt.bar(x + width/2, women_means, width)
# 设置标签标题,图例
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(x,labels)
plt.legend(['Men','Women'])
# 添加注释
def set_label(rects):
for rect in rects:
height = rect.get_height() # 获取⾼度
plt.text(x = rect.get_x() + rect.get_width()/2, # ⽔平坐标
y = height + 0.5, # 竖直坐标
s = height, # ⽂本
ha = 'center') # ⽔平居中
set_label(rects1)
set_label(rects2)
plt.tight_layout() # 设置紧凑布局
plt.savefig('./分组带标签柱状图.png')
r = np.arange(0, 4*np.pi, 0.01) # 弧度值
y = np.linspace(0,2,len(r)) # ⽬标值
ax = plt.subplot(111,projection = 'polar',facecolor = 'lightgreen') # 定义极坐标
ax.plot(r, y,color = 'red')
ax.set_rmax(3) # 设置半径最⼤值
ax.set_rticks([0.5, 1, 1.5, 2]) # 设置半径刻度
ax.set_rlabel_position(-22.5) # 设置半径刻度位置
ax.grid(True) # ⽹格线
ax.set_title("A line plot on a polar axis", va='center',ha = 'center',pad = 30)
N = 8 # 分成8份
theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
radii = np.random.randint(3,15,size = N)
width = np.pi / 4
colors = np.random.rand(8,3) # 随机⽣成颜⾊
ax = plt.subplot(111, projection='polar') # polar表示极坐标
ax.bar(theta, radii, width=width, bottom=0.0,color = colors)
mu = 100 # 平均值
sigma = 15 # 标准差
x = np.random.normal(loc = mu,scale = 15,size = 10000)
fig, ax = plt.subplots()
n, bins, patches = ax.hist(x, 200, density=True) # 直⽅图
# 概率密度函数
y = ((1 / (np.sqrt(2 * np.pi) * sigma)) * np.exp(-0.5 * (1 / sigma * (bins - mu))**2))
plt.plot(bins, y, '--')
plt.xlabel('Smarts')
plt.ylabel('Probability density')
plt.title(r'Histogram of IQ: $\mu=100$, $\sigma=15$')
# 紧凑布局
fig.tight_layout()
plt.savefig('./直⽅图.png')
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=14)
plt.bar([1, 3, 5, 7, 9], [5, 4, 8, 12, 7], label='graph 1')
plt.bar([2, 4, 6, 8, 10], [4, 6, 8, 13, 15], label='graph 2')
# params
# x: 条形图x轴
# y:条形图的高度
# width:条形图的宽度 默认是0.8
# bottom:条形底部的y坐标值 默认是0
# align:center / edge 条形图是否以x轴坐标为中心点或者是以x轴坐标为边缘
plt.legend()
plt.xlabel('number')
plt.ylabel('value')
plt.title(u'测试例子——条形图', FontProperties=font)
plt.show()
data=np.random.normal(size=(500,4))
lables = ['A','B','C','D']
# ⽤Matplotlib画箱线图
plt.boxplot(data,1,'gD',labels=lables) # 红⾊的圆点是异常值
散点图的英⽂叫做 scatter plot,它将两个变量的值显示在⼆维坐标中,⾮常适合展示两个变量之间的关系
data = np.random.randn(100,2)
s = np.random.randint(100,300,size = 100)
color = np.random.randn(100)
plt.scatter(data[:,0], # 横坐标
data[:,1], # 纵坐标
s = s, # 尺⼨
c = color, # 颜⾊
alpha = 0.5) # 透明度
# 解决中⽂字体乱码的问题
matplotlib.rcParams['font.sans-serif']='Kaiti SC'
labels =["五星","四星","三星","⼆星","⼀星"] # 标签
percent = [95,261,105,30,9] # 某市星级酒店数量
# 设置图⽚⼤⼩和分辨率
fig=plt.figure(figsize=(5,5), dpi=150)
# 偏移中⼼量,突出某⼀部分
explode = (0, 0.1, 0, 0, 0)
# 绘制饼图:autopct显示百分⽐,这⾥保留⼀位⼩数;shadow控制是否显示阴影
plt.pie(x = percent, # 数据
explode=explode, # 偏移中⼼量
labels=labels, # 显示标签
autopct='%0.1f%%', # 显示百分⽐
shadow=True) # 阴影,3D效果
plt.savefig("./饼图.jpg")
fig=plt.figure(figsize=(5,5),dpi=100)
#数据集,p1, p2分别对应外部、内部百分⽐例
p1=[43,25,32]
p2=[7,22,14,5,14,6,32]
labels = ['⼩狗','⼩猫','⼩⻦']
def func(pct):
return r'%0.1f'%(pct) + '%'
plt.pie(p1,
autopct=lambda pct: func(pct),
radius=1, # 半径
pctdistance=0.85, # 百分⽐位置
wedgeprops=dict(linewidth=3,width=0.4,edgecolor='w'),# 饼图格式:间隔线宽、饼图宽度、边界颜⾊
labels=labels)
# 绘制内部饼图
plt.pie(p2,
autopct='%0.1f%%',
radius=0.7,
pctdistance=0.7,
wedgeprops=dict(linewidth=3,width=0.7,edgecolor='w'))
# 设置图例标题、位置,frameon控制是否显示图例边框,bbox_to_anchor控制图例显示在饼图的外⾯
plt.legend(labels,loc = 'upper right',bbox_to_anchor = (0.75,0,0.4,1),title ='宠物占⽐')
plt.figure(figsize=(6,6))
# 甜甜圈原料
recipe = ["225g flour",
"90g sugar",
"1 egg",
"60g butter",
"100ml milk",
"1/2package of yeast"]
# 原料⽐例
data = [225, 90, 50, 60, 100, 5]
wedges, texts = plt.pie(data,startangle=40)
bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72)
kw = dict(arrowprops=dict(arrowstyle="-"),bbox=bbox_props,va="center")
for i, p in enumerate(wedges):
ang = (p.theta2 - p.theta1)/2. + p.theta1 # ⻆度计算
# ⻆度转弧度----->弧度转坐标
y = np.sin(np.deg2rad(ang))
x = np.cos(np.deg2rad(ang))
ha = {-1: "right", 1: "left"}[int(np.sign(x))] # ⽔平对⻬⽅式
connectionstyle = "angle,angleA=0,angleB={}".format(ang) # 箭头连接样式
kw["arrowprops"].update({"connectionstyle": connectionstyle}) # 更新箭头连接⽅式
plt.annotate(recipe[i], xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y),ha=ha,**kw,fontsize = 18,weight = 'bold')
plt.title("Matplotlib bakery: A donut",fontsize = 18,pad = 25)
plt.tight_layout()
vegetables = ["cucumber", "tomato", "lettuce", "asparagus","potato", "wheat","barley"]
farmers = list('ABCDEFG')
harvest = np.random.rand(7,7)*5 # 农⺠丰收数据
plt.rcParams['font.size'] = 18
plt.rcParams['font.weight'] = 'heavy'
plt.figure(figsize=(9,9))
im = plt.imshow(harvest)
plt.xticks(np.arange(len(farmers)),farmers,rotation = 45,ha = 'right')
plt.yticks(np.arange(len(vegetables)),vegetables)
# 绘制⽂本
for i in range(len(vegetables)):
for j in range(len(farmers)):
text = plt.text(j, i, round(harvest[i, j],1),ha="center", va="center", color='r')
plt.title("Harvest of local farmers (in tons/year)",pad = 20)
fig.tight_layout()
plt.savefig('./热⼒图.png')
plt.figure(figsize=(9,6))
days = [1,2,3,4,5]
sleeping =[7,8,6,11,7]
eating = [2,3,4,3,2]
working =[7,8,7,2,2]
playing = [8,5,7,8,13]
plt.stackplot(days,sleeping,eating,working,playing)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Stack Plot',fontsize = 18)
plt.legend(['Sleeping','Eating','Working','Playing'],fontsize = 18)
plt.rcParams['font.family'] = 'Kaiti SC'
labels=np.array(["个⼈能⼒","IQ","服务意识","团队精神","解决问题能⼒","持续学习"])
stats=[83, 61, 95, 67, 76, 88]
# 画图数据准备,⻆度、状态值
angles=np.linspace(0, 2*np.pi, len(labels), endpoint=False)
stats=np.concatenate((stats,[stats[0]]))
angles=np.concatenate((angles,[angles[0]]))
# ⽤Matplotlib画蜘蛛图
fig = plt.figure(figsize=(9,9))
ax = fig.add_subplot(111, polar=True)
ax.plot(angles, stats, 'o-', linewidth=2) # 连线
ax.fill(angles, stats, alpha=0.25) # 填充
# 设置⻆度
ax.set_thetagrids(angles*180/np.pi,#⻆度值
labels,
fontsize = 18)
ax.set_rgrids([20,40,60,80],fontsize = 18)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.axes3d import Axes3D # 3D引擎
x = np.linspace(0,60,300)
y = np.sin(x)
z = np.cos(x)
fig = plt.figure(figsize=(9,6)) # ⼆维图形
ax3 = Axes3D(fig) # ⼆维变成了三维
ax3.plot(x,y,z) # 3维折线图
# 3维散点图
ax3.scatter(np.random.rand(50)*60,np.random.rand(50),np.random.rand(50),
color = 'red',s = 100)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.axes3d import Axes3D # 3D引擎
month = np.arange(1,5)
# 每个⽉ 4周 每周都会产⽣数据
# 三个维度:⽉、周、销量
fig = plt.figure(figsize=(9,6))
ax3 = Axes3D(fig)
for m in month:
ax3.bar(np.arange(4),
np.random.randint(1,10,size = 4),
zs = m ,
zdir = 'x',# 在哪个⽅向上,⼀排排排列
alpha = 0.7,# alpha 透明度
width = 0.5)
ax3.set_xlabel('X',fontsize = 18,color = 'red')
ax3.set_ylabel('Y',fontsize = 18,color = 'red')
ax3.set_zlabel('Z',fontsize = 18,color = 'green')
两种常⽤颜⾊:浅蓝⾊: #3c7f99 ,淡⻩⾊:#c5b783
plt.figure(figsize=(12,9))
cities = job['city'].value_counts() # 统计城市⼯作数量
plt.barh(y = cities.index[::-1],
width = cities.values[::-1],
color = '#3c7f99')
plt.box(False) # 不显示边框
plt.title(label=' 各城市数据分析岗位的需求量 ',
fontsize=32, weight='bold', color='white',
backgroundcolor='#c5b783',pad = 30 )
plt.tick_params(labelsize = 16)
plt.grid(axis = 'x',linewidth = 0.5,color = '#3c7f99')
# 获取需求量前10多的领域
industry_index = job["industryField"].value_counts()[:10].index
industry =job.loc[job["industryField"].isin(industry_index),"industryField"]
plt.figure(figsize=(12,9))
plt.barh(y = industry_index[::-1],
width=pd.Series.value_counts(industry.values).values[::-1],
color = '#3c7f99')
plt.title(label=' 细分领域数据分析岗位的需求量(取前⼗) ',
fontsize=32, weight='bold', color='white',
backgroundcolor='#c5b783',ha = 'center',pad = 30)
plt.tick_params(labelsize=16)
plt.grid(lw = 0.5,color = '#3c7f99',ls = '--')
plt.figure(figsize=(12,9))
city_salary = job.groupby("city")["salary"].mean().sort_values() # 分组聚合运算
plt.bar(x = city_salary.index,height = city_salary.values,
color = plt.cm.RdBu_r(np.linspace(0,1,len(city_salary))))
plt.title(label=' 各城市的薪资⽔平对⽐ ',
fontsize=32, weight='bold', color='white', backgroundcolor='#3c7f99')
plt.tick_params(labelsize=16)
plt.grid(axis = 'y',linewidth = 0.5,color = 'black')
plt.yticks(ticks = np.arange(0,25,step = 5,),labels =
['','5k','10k','15k','20k'])
plt.box(False) # 去掉边框
plt.savefig('./各城市薪资状况.png')
work_salary = job.pivot_table(index="city",columns="workYear",values="salary")
# 透视表
work_salary = work_salary[["应届毕业⽣","1-3年","3-5年","5-10年"]]\
.sort_values(by = '5-10年',ascending = False) # 筛选⼀部分⼯作经验
data = work_salary.values
data = np.repeat(data,4,axis = 1) # 重复4次,⽬的画图,美观,图⽚宽度拉⼤
plt.figure(figsize=(12,9))
plt.imshow(data,cmap='RdBu_r')
plt.yticks(np.arange(13),work_salary.index)
plt.xticks(np.array([1.5,5.5,9.5,13.5]),work_salary.columns)
# 绘制⽂本
h,w = data.shape
for x in range(w):
for y in range(h):
if (x%4 == 0) and (~np.isnan(data[y,x])):
text = plt.text(x + 1.5, y, round(data[y,x],1),
ha="center", va="center", color='r',fontsize = 16)
plt.colorbar(shrink = 0.85)
plt.tick_params(labelsize = 16)
plt.savefig('./⼯作经验与薪⽔关系.png')
education = job["education"].value_counts(normalize=True)
plt.figure(figsize=(9,9))
_ = plt.pie(education,labels=education.index,autopct='%0.2f%%',
wedgeprops=dict(linewidth=3,width = 0.5),pctdistance=0.8,
textprops = dict(fontsize = 20))
_ = plt.title(label=' 学历要求 ',
fontsize=32, weight='bold',
color='white', backgroundcolor='#c5b783')
plt.savefig('./学历要求.png')
def get_level(x):
if x["Python/R"] == 1:
x["skill"] = "Python/R"
elif x["SQL"] == 1:
x["skill"] = "SQL"
elif x["Excel"] == 1:
x["skill"] = "Excel"
elif x['SPSS/SAS'] == 1:
x['skill'] = 'SPSS/SAS'
else:
x["skill"] = "其他"
return x
job = job.apply(get_level,axis=1) # 数据转换
# 获取主要技能
x = job.loc[job.skill!='其他'][['salary','skill']]
cond1 = x['skill'] == 'Python/R'
cond2 = x['skill'] =='SQL'
cond3 = x['skill'] == 'Excel'
cond4 = x['skill'] == 'SPSS/SAS'
plt.figure(figsize=(12,8))
plt.title(label=' 不同技能的薪资⽔平对⽐ ',
fontsize=32, weight='bold', color='white',
backgroundcolor='#c5b783',pad = 30)
plt.boxplot(x = [job.loc[job.skill!='其他']['salary'][cond1],
job.loc[job.skill!='其他']['salary'][cond2],
job.loc[job.skill!='其他']['salary'][cond3],
job.loc[job.skill!='其他']['salary'][cond4]],
vert = False,labels = ["Python/R","SQL","Excel",'SPSS/SAS'])
plt.tick_params(axis="both",labelsize=16)
plt.grid(axis = 'x',linewidth = 0.75)
plt.xticks(np.arange(0,61,10), [str(i)+"k" for i in range(0,61,10)])
plt.box(False)
plt.xlabel('⼯资', fontsize=18)
plt.ylabel('技能', fontsize=18)
plt.savefig('./技能要求.png')
colors = [’#ff0000’, ‘#ffa500’, ‘#c5b783’, ‘#3c7f99’, ‘#0000cd’]
skill_count = job[job['companySize'] == '2000⼈以上']
[['Python','SQL','Tableau','Excel','SPSS/SAS']].sum()
plt.figure(figsize=(9,6))
plt.bar(np.arange(5),skill_count,
tick_label = ['Python/R','SQL','Tableau','Excel','SPSS/SAS'],
width = 0.5,
color = plt.cm.RdBu_r(skill_count/skill_count.max()))
_ = plt.title(label=' ⼤公司对技能的要求 ',
fontsize=32, weight='bold', color='white',
backgroundcolor='#c5b783',pad = 30)
plt.tick_params(labelsize=16,)
plt.grid(axis = 'y')
plt.box(False)
plt.savefig('./⼤公司技能要求.png')
from matplotlib import gridspec
workYear_map = {
"5-10年": 5,
"3-5年": 4,
"1-3年": 3,
"1年以下": 2,
"应届毕业⽣": 1}
color_map = {
5:"#ff0000",
4:"#ffa500",
3:"#c5b783",
2:"#3c7f99",
1:"#0000cd"}
cond = job.workYear.isin(workYear_map)
job = job[cond]
job['workYear'] = job.workYear.map(workYear_map)
# 根据companySize进⾏排序,⼈数从多到少
job['companySize'] = job['companySize'].astype('category')
list_custom = ['2000⼈以上', '500-2000⼈','150-500⼈','50-150⼈','15-50⼈','少于15⼈']
job['companySize'].cat.reorder_categories(list_custom, inplace=True)
job.sort_values(by = 'companySize',inplace = True,ascending = False)
plt.figure(figsize=(12,11))
gs = gridspec.GridSpec(10,1)
plt.subplot(gs[:8])
plt.suptitle(t=' 不同规模公司的⽤⼈需求差异 ',
fontsize=32,
weight='bold', color='white', backgroundcolor='#3c7f99')
plt.scatter(job.salary,job.companySize,
c = job.workYear.map(color_map),
s = (job.workYear*100),alpha = 0.35)
plt.scatter(job.salary,job.companySize,
c = job.workYear.map(color_map))
plt.grid(axis = 'x')
plt.xticks(np.arange(0,161,10), [str(i)+"k" for i in range(0,161,10)])
plt.xlabel('⼯资', fontsize=18)
plt.box(False)
plt.tick_params(labelsize = 18)
# 绘制底部标记
plt.subplot(gs[9:])
x = np.arange(5)[::-1]
y = np.zeros(len(x))
s = x*100
plt.scatter(x,y,s=s,c=color_map.values(),alpha=0.3)
plt.scatter(x,y,c=color_map.values())
plt.box(False)
plt.xticks(ticks=x,labels=list(workYear_map.keys()),fontsize=14)
plt.yticks(np.arange(1),labels=[' 经验:'],fontsize=18)
plt.savefig('./不同规模公司招聘薪资⼯作经验差异.png')
装包就是把未命名的参数放到元组中,把命名参数放到字典中
a = (1,2,3)
拆包将一个结构中的数据拆分为多个单独变量中 *args **kwargs
def func(**kwarg):
print(*kwarg) #拆包
print(kwarg) #未拆包
func(a)
#输出
1 2 3
(1, 2, 3)
pymysql拆包
Mysql_config = dict(
host='localhost',
port=3306,
user='root',
password='123456789',
database='mysql',
charset='utf8'
)
def get_conn(config=MYSQL_CONFIG):
conn = pymysql.Connection(
**config # **可将字典解压
)
return conn
if __name__ == '__main__':
conn = get_conn()
集合经常用于交并差集运算
a = set()
b = set()
a.add('123')
a.add('456')
b.add('456')
b.add('789')
print(a - b) # {'123'} 差集
print(a & b) # {'456'} 交集
print(a | b) # {'789', '456', '123'} 并集