实验环境
各城市最低气温可视化
All_Data = []
ua = UserAgent(use_cache_server=False)
# 网页的解析函数
def parse_page(url):
headers = {
'User-Agent': ua.random
}
response = requests.get(url, headers=headers)
text = response.content.decode('utf-8')
soup = BeautifulSoup(text, 'lxml')
conMidtab = soup.find('div', {'class': 'conMidtab'})
tables = conMidtab.find_all('table')
# 查看是否拿到了每个城市的天气
for table in tables:
trs = table.find_all('tr')[2:]
for index, tr in enumerate(trs):
tds = tr.find_all('td')
if len(tds) >= 8:
city_td = tds[0]
if index == 0:
city_td = tds[1]
# 获取标签里面的字符串属性返回一个生成器,因此要转化为一个列表
city = city_td.get_text()
temp_td = tds[-2]
min_temp = temp_td.get_text()
# 将数据添加到列表
All_Data.append({'城市': city, '最低气温': int(min_temp)})
def main():
urls = [
'http://www.weather.com.cn/textFC/hb.shtml',
'http://www.weather.com.cn/textFC/db.shtml',
'http://www.weather.com.cn/textFC/hz.shtml',
'http://www.weather.com.cn/textFC/hn.shtml',
'http://www.weather.com.cn/textFC/hd.shtml',
'http://www.weather.com.cn/textFC/xb.shtml',
'http://www.weather.com.cn/textFC/xn.shtml',
'http://www.weather.com.cn/textFC/gat.shtml'
]
for url in urls:
parse_page(url)
# 分析数据,根据最低气温进行排序
All_Data.sort(key=lambda data: data['最低气温'])
data = All_Data[0:10] # 取出前10的最低气温及其城市
return data
if __name__ == '__main__':
datas = main()
city = []
temp = []
plt.figure(figsize=(15, 9.27))
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
for data in datas:
city.append(data['城市'])
temp.append(data['最低气温'])
plt.bar(range(len(city)), temp, tick_label=city)
plt.show()
import pandas as pd
def main():
aqi_data = pd.read_csv('data/china_city_AQI.csv')
print('基本信息:')
print(aqi_data.info())
print('数据预览:')
print(aqi_data.head())
# 基本统计
print('AQI最大值:', aqi_data['AQI'].max())
print('AQI最小值:', aqi_data['AQI'].min())
print('AQI均值:', aqi_data['AQI'].mean())
# top10
top10_cities = aqi_data.sort_values(by=['AQI']).head(10)
print('空气质量最好的10个城市:')
print(top10_cities)
# bottom10
# bottom10_cities = aqi_data.sort_values(by=['AQI']).tail(10)
bottom10_cities = aqi_data.sort_values(by=['AQI'], ascending=False).head(10)
print('空气质量最差的10个城市:')
print(bottom10_cities)
# 保存csv文件
top10_cities.to_csv('data/top10_aqi.csv', index=False)
bottom10_cities.to_csv('data/bottom10_aqi.csv', index=False)
if __name__ == '__main__':
main()
def main():
aqi_data = pd.read_csv('data/china_city_AQI.csv')
print('基本信息:')
print(aqi_data.info())
print('数据预览:')
print(aqi_data.head())
# 数据清洗
# 只保留AQI>0的数据
# filter_condition = aqi_data['AQI'] > 0
# clean_aqi_data = aqi_data[filter_condition]
clean_aqi_data = aqi_data[aqi_data['AQI'] > 0]
# 基本统计
print('AQI最大值:', clean_aqi_data['AQI'].max())
print('AQI最小值:', clean_aqi_data['AQI'].min())
print('AQI均值:', clean_aqi_data['AQI'].mean())
# top50
top50_cities = clean_aqi_data.sort_values(by=['AQI']).head(50)
print(top50_cities)
top50_cities.plot(kind='bar', x='City', y='AQI', title='空气质量最好的50个城市',
figsize=(20, 10))
plt.figure(figsize=(15, 9.27))
plt.savefig('data/top50_aqi_bar.png')
plt.show()
if __name__ == '__main__':
main()
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
from pyecharts.globals import ThemeType
from pyecharts.charts import Bar
import pyecharts.options as opts
aqi_data = pd.read_csv('data/china_city_AQI.csv')
print('基本信息: ')
print(aqi_data.info())
print('数据预览: ')
print(aqi_data.head())
# 数据清洗
# 只保留AQI>0的数据
clean_aqi_data = aqi_data[aqi_data['AQI'] > 0]
# 基本统计
print('AQI最大值:{}'.format(clean_aqi_data['AQI'].max()))
print('AQI最小值:{}'.format(clean_aqi_data['AQI'].min()))
print('AQI均值:{}'.format(clean_aqi_data['AQI'].mean()))
top50_cities = clean_aqi_data.sort_values(by=['AQI']).head(50)
bar = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_xaxis(top50_cities['City'].tolist())
.add_yaxis('', top50_cities['AQI'].tolist(), label_opts=opts.LabelOpts(is_show=False), category_gap='50%')
.set_global_opts(title_opts=opts.TitleOpts(title='空气质量指数最优TOP50城市'),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=30, interval=0)),
datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_='inside')]
)
)
bar.load_javascript()
bar.render_notebook()