爬取中国天气网各地区最高气温排行

废话不多说,先上代码

import requests
from lxml import etree
from pyecharts import Bar
import operator


def parse(url):
    resp = requests.get(url)
    source = resp.content.decode('utf-8')
    html = etree.HTML(source)
    big_div = html.xpath('//div[@class="hanml"]/div[1]/div')
    weather_list = []
    for div in big_div:
        trs = div.xpath('.//tr[position()>2]')
        for tr in trs[1:]:
            city = tr.xpath('./td[1]/a/text()')[0]
            phenomena = tr.xpath('./td[2]/text()')[0]
            wind = tr.xpath('./td[3]/span/text()')
            diretion = wind[0]
            force = wind[1]
            tmax = tr.xpath('./td[4]/text()')[0]
            # print(city, phenomena, (diretion, force), tmax)

            night_phenomena = tr.xpath('./td[5]/text()')[0]
            night_wind = tr.xpath('./td[6]/span/text()')
            night_diretion = wind[0]
            night_force = wind[1]
            night_tmax = tr.xpath('./td[7]/text()')[0]
            # print(night_phenomena, (night_diretion, night_force), night_tmax)
            weather_dict = dict(city=city,
                                phenomena=phenomena,
                                diretion=diretion,
                                force=force,
                                tmax=tmax,
                                night_phenomena=night_phenomena,
                                night_diretion=night_diretion,
                                night_force=night_diretion,
                                night_tmax=night_tmax)
            weather_list.append(weather_dict)
    return weather_list


# print(len(weather_list))
# print(weather_list)


def gene_charts(weather_list, name):
    bar = Bar(name, name)
    weather_list = sorted(weather_list, key=operator.itemgetter('tmax'), reverse=True)
    kwargs = dict(
        name=name,
        x_axis=[item['city'] for item in weather_list],
        y_axis=[item['tmax'] for item in weather_list]
    )
    bar.add(**kwargs)
    bar.render(name + '.html')


def main():
    region_dict = dict(
        华北='http://www.weather.com.cn/textFC/hb.shtml',
        东北='http://www.weather.com.cn/textFC/db.shtml',
        华东='http://www.weather.com.cn/textFC/hd.shtml',
        华中='http://www.weather.com.cn/textFC/hz.shtml',
        华南='http://www.weather.com.cn/textFC/hn.shtml',
        西北='http://www.weather.com.cn/textFC/xb.shtml',
        西南 = 'http://www.weather.com.cn/textFC/xn.shtml',
        # 港澳台 = 'http://www.weather.com.cn/textFC/gat.shtml'
    )
    for name, url in region_dict.items():
        weather_list = parse(url)
        gene_charts(weather_list, name=name)


if __name__ == '__main__':
    main()

效果图(pyecharts生成)

东北.PNG
西南.PNG

你可能感兴趣的:(爬取中国天气网各地区最高气温排行)