import requests
from bs4 import BeautifulSoup
from pyecharts import Bar#表格库
def main():
urls = [
‘http://www.weather.com.cn/textFC/hb.shtml’,'http://www.weather.com.cn/textFC/db.shtml’,
‘http://www.weather.com.cn/textFC/hd.shtml’,'http://www.weather.com.cn/textFC/hz.shtml’,
‘http://www.weather.com.cn/textFC/hn.shtml’,'http://www.weather.com.cn/textFC/xb.shtml’,
‘http://www.weather.com.cn/textFC/xn.shtml’,'http://www.weather.com.cn/textFC/gat.shtml’
]
for url in urls:
parse_data(url)
all_data = [ ]
def parse_data(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'
}
response = requests.get(url,headers = headers)
text = response.content.decode('utf-8')#使用response.text会发生乱码
# html5lib 速度慢,适用于不规范代码解析;lxml 速度快,适用于规范代码
soup = BeautifulSoup(text,'html5lib')
conMidtab = soup.find('div',attrs={'class' :'conMidtab'})
tables = conMidtab.find_all('table')
print(tables)
for table in tables:
trs = table.find_all('tr')[2:]#去掉前两行
for index,tr in enumerate(trs):
#enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中
tds = tr.find_all('td')
if index == 0:
city_td = tds[1]
else:
city_td = tds[0]
city = list(city_td.stripped_strings)[0]#stripped_strings 方法可以得到过滤掉空格和空行的内容
tem_td = tds[-2]
min_tem = list(tem_td.stripped_strings)[0]
print(min_tem)
all_data.append({'city':city,'min_tem':int(min_tem)})#append函数是在列表的末尾添加新的对象,把温度变为整型便于排序
all_data.sort(key = lambda x:x['min_tem'])
data = all_data[0:10]
#横纵坐标
cities = list(map(lambda x:x['city'],data ))#map是从列表中找出的想要的对象
min_tems = list(map(lambda x:x['min_tem'],data))
#柱状图
char = Bar('中国天气最低排行榜')
char.add('',cities,min_tems)
char.render('tempature.html')