<接上一篇>
gz2015_grade = df_gz2015.groupby(['Grade']).size()/len(df_gz2015)
gz2016_grade = df_gz2016.groupby(['Grade']).size()/len(df_gz2016)
bj2015_grade = df_bj2015.groupby(['Grade']).size()/len(df_bj2015)
bj2016_grade = df_bj2016.groupby(['Grade']).size()/len(df_bj2016)
# 定义一个空气质量等级索引
ix_grade = ['Good', 'Moderate', 'Unhealthy for Sensi', 'Unhealthy', 'Very Unhealthy', 'Hazardous','Beyond Index']
# 创建一个DataFrame对象,先把广州2015年的空气质量等级占比数据加进去
df_grade = pd.DataFrame(gz2015_grade, index = ix_grade, columns=['gz2015'])
# 接下来把其他三个等级数据也加进DataFrame中
df_grade['gz2016'] = gz2016_grade
df_grade['bj2015'] = bj2015_grade
df_grade['bj2016'] = bj2016_grade
# 查看我们获得的DataFrame,这里包含了两地两年的空气质量等级占比数据
df_grade
# 使用饼图查看广州2016年空气质量等级占比
df_grade.ix[:,'gz2016'].plot.pie(title = 'Guangzhou 2016 AQI' ,autopct = '%.1f%%', fontsize = 12, figsize=(6,6))
df_grade.ix[:,['gz2015','gz2016']].plot.bar(title='Guangzhou AQI 2015 vs 2016', figsize=(8,6),fontsize = 12 )
df_grade.ix[:,['gz2016','bj2016']].plot.bar(title='2016 AQI Guangzhou vs Beijing', figsize=(8,6),fontsize = 12)
df_grade.ix[:,['bj2015','bj2016']].plot.bar(title='Beijing AQI 2015 vs 2016', figsize=(8,6),fontsize = 12)
# 计算两地两年的pm2.5测量值月度平均值
gz2015_month = df_gz2015.groupby(['Month'])['Value'].mean()
gz2016_month = df_gz2016.groupby(['Month'])['Value'].mean()
bj2015_month = df_bj2015.groupby(['Month'])['Value'].mean()
bj2016_month = df_bj2016.groupby(['Month'])['Value'].mean()
df_month = pd.DataFrame({'gz2015':gz2015_month}, index = np.arange(1,13))
df_month['gz2016'] = gz2016_month
df_month['bj2015'] = bj2015_month
df_month['bj2016'] = bj2016_month
df_month
df_month.ix[:, ['gz2015','gz2016']].plot(title='Guangzhou PM2.5 Monthly Avg. 2015 vs 2016', figsize=(8,4))
df_month.ix[:, ['bj2015','bj2016']].plot(title='Beijing PM2.5 Monthly Avg. 2015 vs 2016', figsize=(8,4))
df_month.ix[:, ['gz2016','bj2016']].plot(title='2016 PM2.5 Monthly Avg. Beijing vs Guangzhou', figsize=(8,4))
df_hour = pd.DataFrame({'Month': df_gz2015.ix[:,'Month'],
'Day' : df_gz2015.ix[:,'Day'],
'Hour' : df_gz2015.ix[:,'Hour'],
'gz2015':df_gz2015.ix[:,'Value']})
df_hour.describe()
df_hour = df_hour.merge(df_gz2016.ix[:,['Month','Day','Hour','Value']], on=('Month','Day','Hour'))
df_hour.rename_axis({'Value':'gz2016'}, axis="columns", inplace=True)
df_hour.describe()
df_hour = df_hour.merge(df_bj2015.ix[:,['Month','Day','Hour','Value']], on=('Month','Day','Hour'))
df_hour.rename_axis({'Value':'bj2015'}, axis="columns", inplace=True)
df_hour = df_hour.merge(df_bj2016.ix[:,['Month','Day','Hour','Value']], on=('Month','Day','Hour'))
df_hour.rename_axis({'Value':'bj2016'}, axis="columns", inplace=True)
df_hour.head()
df_hour.describe()
df_hour.head()
len(df_hour[df_hour['gz2015']>df_hour['gz2016']]), 1.0*len(df_hour[df_hour['gz2015']>df_hour['gz2016']])/len(df_hour)
len(df_hour[df_hour['gz2015']<df_hour['gz2016']]),1.0*len(df_hour[df_hour['gz2015']<df_hour['gz2016']])/len(df_hour)
df_hour.ix[:, ['gz2015','gz2016']].plot(title='Guangzhou PM2.5 Hourly 2015 vs 2016', figsize=(12,4))
len(df_hour[df_hour['bj2016']>df_hour['gz2016']]), 1.0*len(df_hour[df_hour['bj2016']>df_hour['gz2016']])/len(df_hour)
len(df_hour[df_hour['bj2016']<df_hour['gz2016']]), 1.0*len(df_hour[df_hour['bj2016']<df_hour['gz2016']])/len(df_hour)
df_hour.ix[:, ['bj2016','gz2016']].plot(title='2016 PM2.5 Hourly Beijing vs Guangzhou', figsize=(12,4))