https://www.jianshu.com/p/7a0eafdd1340
《利用Python进行数据分析·第2版》第9章 绘图和可视化
matplotlib
引入
%matplotlib notebook
import matplotlib.pyplot as plt
简单示例
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
#fig, axes = plt.subplots(2, 2) #另一种更便捷的方法
ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
#调整subplot周围的间距
#wspace和hspace用于控制宽度和高度的百分比,可以用作subplot之间的间距
subplots_adjust(left=None, bottom=None, right=None, top=None,
wspace=None, hspace=None)
颜色、标记和线型
#颜色:绿色,线型:虚线
ax.plot(x, y, 'g--')
ax.plot(x, y, linestyle='--', color='g')
#颜色:黑色,标记:,线型:虚线
plt.plot(randn(30).cumsum(), 'ko--')
plt.plot(randn(30).cumsum(), color='k', linestyle='dashed', marker='o')
颜色
https://matplotlib.org/gallery/color/color_demo.html
Matplotlib recognizes the following formats to specify a color:
- an RGB or RGBA tuple of float values in
[0, 1]
(e.g.(0.1, 0.2, 0.5)
or(0.1, 0.2, 0.5, 0.3)
). RGBA is short for Red, Green, Blue, Alpha;- a hex RGB or RGBA string (e.g.
'#0F0F0F'
or'#0F0F0F0F'
);- a string representation of a float value in
[0, 1]
inclusive for gray level (e.g.'0.5'
);- a single letter string, i.e. one of
{'b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'}
;- a X11/CSS4 ("html") color name, e.g.
"blue"
;- a name from the xkcd color survey, prefixed with
'xkcd:'
(e.g.,'xkcd:sky blue'
);- a "Cn" color spec, i.e.
'C'
followed by a number, which is an index into the default property cycle (matplotlib.rcParams['axes.prop_cycle']
); the indexing is intended to occur at rendering time, and defaults to black if the cycle does not include color.- one of
{'tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple', 'tab:brown', 'tab:pink', 'tab:gray', 'tab:olive', 'tab:cyan'}
which are the Tableau Colors from the 'tab10' categorical palette (which is the default color cycle);
named colors表: https://matplotlib.org/gallery/color/named_colors.html
标记
'.'
point marker
','
pixel marker
'o'
circle marker
'v'
triangle_down marker
'^'
triangle_up marker
'<'
triangle_left marker
'>'
triangle_right marker
'1'
tri_down marker
'2'
tri_up marker
'3'
tri_left marker
'4'
tri_right marker
's'
square marker
'p'
pentagon marker
'*'
star marker
'h'
hexagon1 marker
'H'
hexagon2 marker
'+'
plus marker
'x'
x marker
'D'
diamond marker
'd'
thin_diamond marker
'|'
vline marker
'_'
hline marker
线型
实线:'-'
or 'solid'
虚线:'--'
or 'dashed'
虚线加点:'-.'
or 'dashdot'
点:':'
or 'dotted'
轴刻度
#设置X轴范围
plt.xlim([0,1000])
#将刻度放在数据范围中的哪些位置
#默认情况下,这些位置也就是刻度标签
ticks = ax.set_xticks([0, 250, 500, 750, 1000])
#将任何其他的值用作标签
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
rotation=30, fontsize='small')
#为X轴设置一个名称
ax.set_xlabel('Stages')
#设置标题
ax.set_title('My first matplotlib plot')
#或者,批量设定
props = {
'title': 'My first matplotlib plot',
'xlabel': 'Stages'
}
ax.set(**props)
图例legend
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
#label参数,添加图例
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')
#创建图例
ax.legend(loc='best')
'best'
'upper right'
'upper left'
'lower left'
'lower right'
'right'
'center left'
'center right'
'lower center'
'upper center'
'center'
注解
ax.text(x, y, 'Hello world!', family='monospace', fontsize=10)
#text、arrow和annotate
seaborn
线型图
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot() #默认线型图
#df.plot.line()
柱状图
fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0], color='k', alpha=0.7) #水平柱状图
data.plot.barh(ax=axes[1], color='k', alpha=0.7) #垂直柱状图
df.plot.barh(stacked=True, alpha=0.5) #堆积柱状图
s.value_counts().plot.bar() #频率柱状图
import seaborn as sns
sns.set(style="whitegrid") #图形背景和网格线
sns.barplot(x='tip_pct', y='day', data=tips, orient='h') #垂直柱状图
#绘制在柱状图上的黑线代表95%置信区间(可以通过可选参数配置)。
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')
直方图和密度图
tips['tip_pct'].plot.hist(bins=50) #直方图
tips['tip_pct'].plot.density() #密度图,也被称作KDE(Kernel Density Estimate,核密度估计)图
#plot.kde
sns.distplot(values, bins=100, color='k') #同时画出直方图和连续密度估计图
散布图和散点图
sns.regplot('m1', 'unemp', data=trans_data) #散布图
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2}) #散布图矩阵(scatter plot matrix)
分面图
#分面网格(facet grid)图
sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker',
kind='bar', data=tips[tips.tip_pct < 1])
#扩展分面网格
sns.factorplot(x='day', y='tip_pct', row='time',
col='smoker',
kind='bar', data=tips[tips.tip_pct < 1])
#盒图
sns.factorplot(x='tip_pct', y='day', kind='box',
data=tips[tips.tip_pct < 0.5])