分类变量(categorical data)
sns.boxplot(x="day", y="total_bill", hue="weekend", data=tips, dodge=False);
sns.jointplot(x="total_bill", y="tip", data=tips, kind="reg");
sns.pairplot(tips, x_vars=["total_bill", "size"], y_vars=["tip"],hue="smoker", size=5, aspect=.8, kind="reg");
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')
plt.style.use('classic')
%matplotlib inline
#first little example
x = np.linspace(0, 10, 100)
fig = plt.figure()
plt.plot(x, np.sin(x), '-')
plt.plot(x, np.cos(x), '--')
plt.show()
#matlab-style interface
plt.figure()
#create the first of two panels and set current axis
plt.subplot(2, 1, 1) # (rows, columns, panel number)
plt.plot(x, np.sin(x))
#create the second panel and set current axis
plt.subplot(2, 1, 2)
plt.plot(x, np.cos(x))
plt.show()
#Object-oriented interface
#ax will be an array of two Axes objects
fig, ax = plt.subplots(2)
#Call plot() method on the appropriate object
ax[0].plot(x, np.sin(x))
ax[1].plot(x, np.cos(x))
plt.show()
#底图风格
plt.style.use('seaborn-whitegrid')
fig = plt.figure()
ax = plt.axes()
plt.figure()
ax = plt.axes()
x = np.linspace(0, 10, 100)
ax.plot(x, np.sin(x))
plt.show()
#颜色调整
plt.plot(x, np.sin(x - 0), color='blue') # specify color by name
plt.plot(x, np.sin(x - 1), color='g') # short color code (rgbcmyk)
plt.plot(x, np.sin(x - 2), color='0.75') # Grayscale between 0 and 1
plt.plot(x, np.sin(x - 3), color='#FFDD44') # Hex code (RRGGBB from 00 to FF)
plt.plot(x, np.sin(x - 4), color=(1.0,0.2,0.3)) # RGB tuple, values 0 to 1
plt.plot(x, np.sin(x - 5), color='chartreuse') # all HTML color names supporte
plt.show()
#线条样式
plt.plot(x, x + 0, linestyle='solid')
plt.plot(x, x + 1, linestyle='dashed')
plt.plot(x, x + 2, linestyle='dashdot')
plt.plot(x, x + 3, linestyle='dotted');
#For short, you can use the following codes:
plt.plot(x, x + 4, linestyle='-') # solid
plt.plot(x, x + 5, linestyle='--') # dashed
plt.plot(x, x + 6, linestyle='-.') # dashdot
plt.plot(x, x + 7, linestyle=':') # dotted
plt.show()
#标记展示
rng = np.random.RandomState(0)
for marker in ['o', '.', ',', 'x', '+', 'v', '^', '<', '>', 's', 'd']:
plt.plot(rng.rand(5), rng.rand(5), marker,
label="marker='{0}'".format(marker))
plt.legend(numpoints=1)
plt.xlim(0, 1.8)
plt.show()
#散点图
x = np.linspace(0, 10, 30)
y = np.sin(x)
plt.plot(x, y, 'o', color='black')
plt.show()
#直方图
data = np.random.randn(1000)
plt.hist(data,color='g')
plt.show()
plt.hist(data, bins=30, normed=True, alpha=0.5,
histtype='stepfilled', color='steelblue',
edgecolor='none')
plt.show()
x1 = np.random.normal(0, 0.8, 1000)
x2 = np.random.normal(-2, 1, 1000)
x3 = np.random.normal(3, 2, 1000)
kwargs = dict(histtype='stepfilled', alpha=0.3, normed=True, bins=40)
plt.hist(x1, **kwargs)
plt.hist(x2, **kwargs)
plt.hist(x3, **kwargs);
#柱状图
men_means, men_std = (20, 35, 30, 35, 27), (2, 3, 4, 1, 2)
women_means, women_std = (25, 32, 34, 20, 25), (3, 5, 2, 3, 3)
ind = np.arange(len(men_means)) # the x locations for the groups
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(ind - width/2, men_means, width, yerr=men_std,
color='SkyBlue', label='Men')
rects2 = ax.bar(ind + width/2, women_means, width, yerr=women_std,
color='IndianRed', label='Women')
#Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(ind)
ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5'))
ax.legend()
def autolabel(rects, xpos='center'):
"""
Attach a text label above each bar in *rects*, displaying its height.
*xpos* indicates which side to place the text w.r.t. the center of
the bar. It can be one of the following {'center', 'right', 'left'}.
"""
xpos = xpos.lower() # normalize the case of the parameter
ha = {'center': 'center', 'right': 'left', 'left': 'right'}
offset = {'center': 0.5, 'right': 0.57, 'left': 0.43} # x_txt = x + w*off
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()*offset[xpos], 1.01*height,
'{}'.format(height), ha=ha[xpos], va='bottom')
autolabel(rects1, "left")
autolabel(rects2, "right")
plt.show()
#条形图/水平柱状图
#Fixing random state for reproducibility
np.random.seed(19680801)
plt.rcdefaults()
fig, ax = plt.subplots()
#Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
error = np.random.rand(len(people))
ax.barh(y_pos, performance, xerr=error, align='center',
color='green', ecolor='black')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Performance')
ax.set_title('How fast do you want to go today?')
plt.show()
#箱线图
from matplotlib.patches import Polygon
#Fixing random state for reproducibility
np.random.seed(19680801)
#fake up some data
spread = np.random.rand(50) * 100
center = np.ones(25) * 50
flier_high = np.random.rand(10) * 100 + 100
flier_low = np.random.rand(10) * -100
data = np.concatenate((spread, center, flier_high, flier_low))
fig, axs = plt.subplots(2, 3)
#basic plot
axs[0, 0].boxplot(data)
axs[0, 0].set_title('basic plot')
#notched plot
axs[0, 1].boxplot(data, 1)
axs[0, 1].set_title('notched plot')
#change outlier point symbols
axs[0, 2].boxplot(data, 0, 'gD')
axs[0, 2].set_title('change outlier\npoint symbols')
#don't show outlier points
axs[1, 0].boxplot(data, 0, '')
axs[1, 0].set_title("don't show\noutlier points")
#horizontal boxes
axs[1, 1].boxplot(data, 0, 'rs', 0)
axs[1, 1].set_title('horizontal boxes')
#change whisker length
axs[1, 2].boxplot(data, 0, 'rs', 0, 0.75)
axs[1, 2].set_title('change whisker length')
fig.subplots_adjust(left=0.08, right=0.98, bottom=0.05, top=0.9,
hspace=0.4, wspace=0.3)
#fake up some more data
spread = np.random.rand(50) * 100
center = np.ones(25) * 40
flier_high = np.random.rand(10) * 100 + 100
flier_low = np.random.rand(10) * -100
d2 = np.concatenate((spread, center, flier_high, flier_low))
data.shape = (-1, 1)
d2.shape = (-1, 1)
#Making a 2-D array only works if all the columns are the
#same length. If they are not, then use a list instead.
#This is actually more efficient because boxplot converts
#a 2-D array into a list of vectors internally anyway.
data = [data, d2, d2[::2, 0]]
#Multiple box plots on one Axes
fig, ax = plt.subplots()
ax.boxplot(data)
plt.show()
#set style darkgrid,whitegrid,dark,white,ticks
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
plt.plot(np.arange(10))
plt.show()
import pandas as pd
df_iris = pd.read_csv('./iris.csv')
fig, axes = plt.subplots(1, 2)
sns.distplot(df_iris['petal length'], ax = axes[0], kde = True, rug = True)
sns.kdeplot(df_iris['petal length'], ax = axes[1], shade=True)
plt.show()
sns.set(palette="muted", color_codes=True)
rs = np.random.RandomState(10)
d = rs.normal(size=100)
f, axes = plt.subplots(2, 2, figsize=(7, 7), sharex=True)
sns.distplot(d, kde=False, color="b", ax=axes[0, 0])
sns.distplot(d, hist=False, rug=True, color="r", ax=axes[0, 1])
sns.distplot(d, hist=False, color="g", kde_kws={"shade": True}, ax=axes[1, 0])
sns.distplot(d, color="m", ax=axes[1, 1])
plt.show()
#箱线图
sns.boxplot(x = df_iris['class'], y = df_iris['sepal width'])
#图矩阵
sns.set()
sns.pairplot(df_iris, hue="class")
plt.show()
pandas_profiling基于pandas的DataFrame数据类型,可以简单快速地进行探索性数据分析。
对于数据集的每一列,pandas_profiling会提供以下统计信息:
#导入相关库
import seaborn as sns
import pandas as pd
import pandas_profiling as pp
import matplotlib.pyplot as plt
#加载泰坦尼克数据集
data = sns.load_dataset('titanic')
data.head()
report = pp.ProfileReport(data)
report
report.to_file('report.html')