一、指标展示与数据样例
![指定城市|日度空气质量数据爬虫【附代码】_第1张图片](http://img.e-com-net.com/image/info8/42b4caf97cf348b183640b793c740b90.jpg)
二、代码
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
import xlwt
# (1)UA信息更改
headers = {
'User-Agent':'浏览器的UA'
}
# (2)解析网址后选择山东、江苏的部分城市作为样本
city = {"山东":['jinan','zibo','binzhou','dongying'],
"江苏":['nanjing','huaian','lianyungang'],
}
# (3)时间窗:2020/12-2021/11
month2021 = ['202012']+['2021%02d'%m for m in range(1,12)]
def getWeather(city,month):
item = []
url = 'http://www.tianqihoubao.com/aqi/'+city+'-'+month+'.html'
response = requests.get(url=url,headers=headers,timeout = 32)
response.encoding = "gbk"
page_text = response.text
soup = BeautifulSoup(page_text,'lxml')
tr_list = soup.find_all('tr')
print(city,month,len(tr_list[1:]))
# (4)默认收集了全部的指标,也可以自定义删除掉不需要的哈
dates,levels,aqirates,aqiranks,pm25s,pm10s,so2s,no2s,cos,o3s = [],[],[],[],[],[],[],[],[],[]
for tr in tr_list[1:]:
sub_data = tr.text.split()
date = sub_data[0]
level = sub_data[1]
aqirate = sub_data[2]
aqirank = sub_data[3]
pm25 = sub_data[4]
pm10 = sub_data[5]
so2 = sub_data[6]
no2 = sub_data[7]
co = sub_data[8]
o3 = sub_data[9]
dates.append(date)
levels.append(level)
aqirates.append(aqirate)
aqiranks.append(aqirank)
pm25s.append(pm25)
pm10s.append(pm10)
so2s.append(so2)
no2s.append(no2)
cos.append(co)
o3s.append(o3)
item.append(dates)
item.append(levels)
item.append(aqirates)
item.append(aqiranks)
item.append(pm25s)
item.append(pm10s)
item.append(so2s)
item.append(no2s)
item.append(cos)
item.append(o3s)
return item
# (5)存放路径
file = r'C:\Users\86173\Desktop\天气案例'
isExists = os.path.exists(file)
if not isExists:
os.makedirs(file)
for pro in list(city.keys()):
city_file = file+'\\'+pro
isExists = os.path.exists(city_file)
if not isExists:
os.makedirs(city_file)
os.chdir(file)
head=['日期','质量等级','AQI指数','当天AQI排名','PM2.5','PM10','So2','No2','Co','O3']
for val in city[pro]:
book = xlwt.Workbook(encoding='utf-8')
for j in range(12):
sheet = book.add_sheet(str(month2021[j]))
item = getWeather(val,month2021[j])
for head_i in range(0,10):
sheet.write(0,head_i,head[head_i])
for col in range(0,10):
a = item[col]
for row in range(len(a)):
sheet.write(row+1,col,a[row])
book.save('./'+pro+'/样例-'+val+'.xls')
ps:标注(1)~(5)是必须要根据自己的情况更改的部分噢
三、结果展示
![指定城市|日度空气质量数据爬虫【附代码】_第2张图片](http://img.e-com-net.com/image/info8/ddac06609ee84effae407c23433c327e.jpg)
![指定城市|日度空气质量数据爬虫【附代码】_第3张图片](http://img.e-com-net.com/image/info8/4508082436bc4288adb34858f7107866.jpg)