该代码通常适用于年月日都对应的
import pandas as pd
# 读取附件1
df1 = pd.read_excel('./data/附件1:污染物浓度数据.xlsx')
# 读取附件2
df2 = pd.read_excel('./data/附件2:气象数据.xlsx')
# 合并表格
df = pd.merge(df1, df2, left_on=['年', '月', '日'], right_on=['V04001', 'V04002', 'V04003'], how='left')
# 删除重复列
df = df.drop(['V04001', 'V04002', 'V04003'], axis=1)
# 输出合并后的表格
df.to_excel('./result/merge_table-t.xlsx', index=False)
import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['SimHei'] # 设置中文字体为黑体
matplotlib.rcParams['axes.unicode_minus'] = False # 解决负号显示为方块的问题
# 绘制饼图
pollution_count = df['质量等级'].value_counts()
plt.pie(pollution_count.values, labels=pollution_count.index, autopct='%1.1f%%')
plt.title('Air Quality Distribution')
plt.show()
# 绘制区域图
aqi_by_month = df.groupby('月')['AQI'].mean()
plt.fill_between(aqi_by_month.index, aqi_by_month.values)
plt.title('Monthly Average AQI')
plt.xlabel('Month')
plt.ylabel('AQI')
plt.show()
# 绘制热力图
pollutants_by_city_month = df.pivot_table(index='质量等级', columns='月', values=['PM2.5', 'NO2', 'SO2', 'O3', 'CO'], aggfunc='mean')
sns.heatmap(pollutants_by_city_month, cmap='Blues')
plt.title('Pollutant Levels by Air Quality and Month')
plt.xlabel('Month')
plt.ylabel('Air Quality')
plt.show()
# 绘制直方图
plt.hist(df['AQI'], bins=20)
plt.title('AQI Distribution')
plt.xlabel('AQI')
plt.ylabel('Frequency')
plt.show()
# 绘制散点图
plt.scatter(df['PM2.5'].astype(float), df['AQI'].astype(float))
plt.title('PM2.5 vs AQI')
plt.xlabel('PM2.5')
plt.ylabel('AQI')
plt.show()
# 绘制散点矩阵图
sns.pairplot(df[['AQI', 'PM2.5', 'NO2', 'SO2', 'O3', 'CO']])
plt.show()
# 按月份统计AQI均值
aqi_by_month = df.groupby('月')['AQI'].mean()
# 绘制柱状图
plt.bar(aqi_by_month.index.astype(str), aqi_by_month.values)
plt.title('Monthly Average AQI')
plt.xlabel('Month')
plt.ylabel('AQI')
plt.show()
# 按日期统计AQI和PM2.5的均值
pollutants_by_day = df.groupby(['月', '日'])[['AQI', 'PM2.5']].mean().reset_index()
# 绘制折线图
plt.plot(pollutants_by_day.index, pollutants_by_day['AQI'], label='AQI')
plt.plot(pollutants_by_day.index, pollutants_by_day['PM2.5'], label='PM2.5')
plt.title('Daily Average AQI and PM2.5')
plt.xlabel('Date')
plt.ylabel('Concentration')
plt.legend()
plt.show()
# 按质量等级统计PM2.5
pm_by_quality = df.groupby('质量等级')['PM2.5'].apply(list)
# 绘制箱线图
plt.boxplot(pm_by_quality.values, labels=pm_by_quality.index)
plt.title('PM2.5 Distribution by Air Quality')
plt.xlabel('Air Quality')
plt.ylabel('PM2.5')
plt.show()