%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
x = np.random.normal(size=100)
sns.distplot(x, kde=True)
#.分布的集中趋势,反映数据向其中心值靠拢或聚集的程度
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
x, y = np.random.multivariate_normal(mean, cov, 1000).T
with sns.axes_style("ticks"):
sns.jointplot(x=x, y=y, kind="hex")
#能够更加直观反映点的分布情况
sns.jointplot(x="x", y="y", data=df, kind="kde")
#直接拟合概率密度函数
# your code
%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
data = sns.load_dataset("flights")
data.head()
# 年份,月份,乘客数
# 分析年度乘客总量变化情况(折线图)
plt.rcParams["figure.figsize"] = (12,8)
sns.pointplot(x="year", y="passengers", data=data)
plt.title("The change number of passengers")
plt.grid(True)
# 分析乘客在一年中各月份的分布(柱状图)
sns.set(color_codes=True)
sns.barplot(x="month", y="passengers", hue='year', data=data)
plt.title("The number of passengers every month")
data = sns.load_dataset("iris")
data.head()
#萼片(sepal)和花瓣(petal)的大小关系(散点图)
data.loc[:,'sepal'] = data['sepal_length'] * data['sepal_width']
data.loc[:,'petal'] = data['petal_length'] * data['petal_width']
sns.jointplot(x="sepal", y="petal", data=data)
plt.title("The relation of sepal and petal")
#不同种类鸢尾花萼片和花瓣大小的分布情况(柱状图或者箱式图)
sns.boxplot(x='species',y='petal', hue="species",data=data)
#不同种类(species)鸢尾花萼片和花瓣的大小关系(分类散点子图)
sns.lmplot(x="sepal", y="petal", hue="species", data=data)
#不同种类(species)鸢尾花萼片和花瓣的大小关系(分类散点子图)
sns.stripplot(x='sepal', y='petal', hue='species', data=data)
plt.title(u'不同种类鸢尾花萼片和花瓣的大小关系',fontsize=20)
plt.xticks(rotation=60)
#小费和总消费之间的关系(散点图)
sns.jointplot(x="tip", y="total_bill", data=data)
#男性顾客和女性顾客,谁更慷慨(分类箱式图)
sns.boxplot(x="sex", y="tip", data=data)
#男性普遍更慷慨一些
#抽烟与否是否会对小费金额产生影响(分类箱式图)
sns.boxplot(x="smoker", y="tip", data=data)
#抽烟的人更慷慨些
#工作日和周末,什么时候顾客给的小费更慷慨(分类箱式图)
sns.boxplot(x="day", y="tip", data=data)
#午饭和晚饭,哪一顿顾客更愿意给小费(分类箱式图)
sns.boxplot(x="time", y="tip", data=data)
#就餐人数是否会对慷慨度产生影响(分类箱式图)
sns.boxplot(x="size", y="tip", data=data)
#性别+抽烟的组合因素对慷慨度的影响
sns.barplot(x="sex", y="tip", hue="smoker", data=data)
def pile_bar(data,pile_col,clsf_col):
data1 = data.groupby([pile_col,clsf_col])['survived'].count()
data1 = data1.unstack()
data1 = data1/data1.sum()
x_data = range(len(data1.columns))
q = pd.Series([0 for i in x_data])
bottom = q
colors = ['#539caf', '#7663b0','darkorange']
_,axe = plt.subplots()
for c,p in zip(colors,data1.index):
bottom +=q
plt.bar(x_data, data1.loc[p],bottom=bottom,label=p,
width=0.15*len(data1.columns),color=c)
q = data1.loc[p].reset_index()[p]
plt.xlabel(data1.columns.name,fontsize=20)
plt.xticks(x_data,data1.columns, fontsize=15)
plt.ylabel('passengers survived',fontsize=20)
plt.legend(fontsize=15)
axe.spines['top'].set_color(None)
axe.spines['right'].set_color(None)
pile_bar(data,'alive','class')
#不同仓位等级中幸存和遇难的乘客比例(堆积柱状图)
data['totle'] = 1
fig, ax = plt.subplots(figsize = (8,5))
sns.barplot(x = "pclass", y = "totle", data = data, color = "r", ax = ax)
sns.barplot(x = "pclass", y = "survived", data = data, color = "b", ax = ax)
#不同性别的幸存比例(堆积柱状图)
fig, ax = plt.subplots(figsize = (8,5))
sns.barplot(x = "sex", y = "totle", data = data, color = "r", ax = ax)
sns.barplot(x = "sex", y = "survived", data = data, color = "b", ax = ax)
plt.ylim(0, 1.3)
#幸存和遇难乘客的票价分布(分类箱式图)
sns.boxplot(x="survived", y="fare", data=data)
plt.title('The relation of the fare and the survived')
#不同上船港口的乘客仓位等级分布(分组柱状图)
sns.barplot(x="class", y="survived", hue="embarked", data=data)
#单独乘船与否和幸存之间有没有联系(堆积柱状图或者分组柱状图)
sns.countplot(x="survived", hue="alone", data=data)