写在前面:只记录本人在Kaggle数据可视化课程学习过程中,Seaborn模块的常用方法及部分效果图笔记,数据类型不作介绍和处理
课程链接Kaggle数据可视化课程
import pandas as pd
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
# Set the width and height of the figure
plt.figure(figsize=(16,6))
# 曲线图(此处示例数据为日期索引)
sns.lineplot(data=fifa_data)
sns.lineplot(data=spotify_data['Shape of You'], label="Shape of You")
sns.lineplot(data=spotify_data['Despacito'], label="Despacito")
plt.xlabel("Date")
# Bar chart showing average score for racing games by platform
plt.figure(figsize=(8, 6))
sns.barplot(x=ign_data['Racing'], y=ign_data.index) # Your code here
# Add label for horizontal axis
plt.xlabel("Average Score")
# Add label for vertical axis
plt.title("Average Score for Racing Games, by Platform")
# Heatmap showing average game score by platform and genre
plt.figure(figsize=(10,10))
sns.heatmap(ign_data, annot=True)
#
# Add label for horizontal axis
plt.xlabel("Genre")
# Add label for vertical axis
plt.title("Average Game Score, by Platform and Genre")
annot=True
- 加载数据集时,确保每个单元格的值都显示在图表上(忽略此操作将删除每个单元格中的数值)
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'])
sns.regplot(x=insurance_data['bmi'], y=insurance_data['charges'])
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'], hue=insurance_data['smoker'])
sns.lmplot(x="bmi", y="charges", hue="smoker", data=insurance_data)
sns.swarmplot(x=insurance_data['smoker'],
y=insurance_data['charges'])
iris数据
# Histogram
sns.distplot(a=iris_data['Petal Length (cm)'], kde=False)
kde=False
必须提供该参数,设置为True则会在直方图的基础上显示KDE曲线。
使用不同颜色标识不同标签数据的直方图
# Histograms for each species
sns.distplot(a=iris_set_data['Petal Length (cm)'], label="Iris-setosa", kde=False)
sns.distplot(a=iris_ver_data['Petal Length (cm)'], label="Iris-versicolor", kde=False)
sns.distplot(a=iris_vir_data['Petal Length (cm)'], label="Iris-virginica", kde=False)
# Add title
plt.title("Histogram of Petal Lengths, by Species")
# Force legend to appear
plt.legend()
# KDE plot
sns.kdeplot(data=iris_data['Petal Length (cm)'], shade=True)
# shade=True 表示将曲线下方区域用颜色标记
# 2D KDE plot
sns.jointplot(x=iris_data['Petal Length (cm)'], y=iris_data['Sepal Width (cm)'], kind="kde")
使用不同颜色标识不同标签数据
# KDE plots for each species
sns.kdeplot(data=iris_set_data['Petal Length (cm)'], label="Iris-setosa", shade=True)
sns.kdeplot(data=iris_ver_data['Petal Length (cm)'], label="Iris-versicolor", shade=True)
sns.kdeplot(data=iris_vir_data['Petal Length (cm)'], label="Iris-virginica", shade=True)
# Add title
plt.title("Distribution of Petal Lengths, by Species")
持续更新中~(2020-5-8)