第1关 数据介绍
happiness2015 = pd.read_csv('World_Happiness_2015.csv')
first_5 = happiness2015.head()
happiness2015.info()
print(first_5)
第2关 用循环进行数据聚合
for reg in Region:
region_group = happiness2015[happiness2015['Region'] == reg]
region_mean = region_group['Happiness Score'].mean()
mean_happiness[reg] = region_mean
第3关 GroupBy的使用
grouped=happiness2015.groupby('Region')
aus_nz = grouped.get_group('Australia and New Zealand')
print(aus_nz)
第4关 探索GroupBy对象
grouped = happiness2015.groupby('Region')
north_america = happiness2015.iloc[[4, 14]]
na_group = grouped.get_group('North America')
equal = north_america == na_group
print(north_america)
print(na_group)
print(equal)
第5关 GroupBy的公共聚合方法的使用
grouped = happiness2015.groupby('Region')
means = grouped.mean()
print(means)
第6关 使用Groupby聚合特定列
grouped = happiness2015.groupby('Region')
happy_grouped = grouped['Happiness Score']
happy_mean = happy_grouped.mean()
print(happy_mean)
第7关 学习Agg()方法的使用
def dif(x):
return np.max(x) - np.mean(x)
happy_mean_max = happy_grouped.agg([np.mean, np.max])
mean_max_dif = happy_grouped.agg(dif)
print(happy_mean_max)
print(mean_max_dif)
第8关 Agg()方法使用(二)
print(happiness_means)
第9关 使用df.pivot_table()进行聚合
pv_happiness = happiness2015.pivot_table(values='Happiness Score', index='Region', aggfunc='mean', margins=True)
pv_happiness.plot(kind='barh', xlim=(0,10), title='Mean Happiness Scores by Region', legend=False)
world_mean_happiness = happiness2015['Happiness Score'].mean()
print(pv_happiness)
print(world_mean_happiness)
第10关 df.pivot_table()使用(二)
grouped = happiness2015.groupby('Region')[['Happiness Score', 'Family']]
happy_family_stats = grouped.agg([np.min, np.max, np.mean])
pv_happy_family_stats = pd.pivot_table(happiness2015, values=['Family', 'Happiness Score'], index=['Region'], aggfunc=[np.min, np.max, np.mean], margins=True)
print(happy_family_stats)
print(pv_happy_family_stats)