python学习(目前的问题)11.14

这个是学习网站的代码参考(爬取的网站已经不能用了):

import requests
from bs4 import BeautifulSoup


def get_city_aqi(city_pinyin):
    """
        获取城市的AQI
    """
    url = 'http://pm25.in/' + city_pinyin
    r = requests.get(url, timeout=30)
    soup = BeautifulSoup(r.text, 'lxml')
    div_list = soup.find_all('div', {
     'class': 'span1'})

    city_aqi = []
    for i in range(8):
        div_content = div_list[i]
        caption = div_content.find('div', {
     'class': 'caption'}).text.strip()
        value = div_content.find('div', {
     'class': 'value'}).text.strip()

        city_aqi.append((caption, value))
    return city_aqi


def get_all_cities():
    """
        获取所有城市
    """
    url = 'http://pm25.in/'
    city_list = []
    r = requests.get(url, timeout=30)
    soup = BeautifulSoup(r.text, 'lxml')

    city_div = soup.find_all('div', {
     'class': 'bottom'})[1]
    city_link_list = city_div.find_all('a')
    for city_link in city_link_list:
        city_name = city_link.text
        city_pinyin = city_link['href'][1:]
        city_list.append((city_name, city_pinyin))
    return city_list


def main():
    """
        主函数
    """
    city_list = get_all_cities()
    for city in city_list:
        city_name = city[0]
        city_pinyin = city[1]
        city_aqi = get_city_aqi(city_pinyin)
        print(city_name, city_aqi)


if __name__ == '__main__':
    main()

这个是我的代码:

"""
    作者:sirosilk
    日期:2019.11.14
    功能:空气质量爬虫
    版本:6.0
"""
import requests
from bs4 import BeautifulSoup
from pprint import pprint


def get_city_aqi(city_pinyin):
    """
        获取城市的AQI
    """
    url = 'http://www.pm25x.com/city/' + city_pinyin + '.htm'
    r = requests.get(url, timeout=30)
    soup = BeautifulSoup(r.text, 'lxml')
    div_list = soup.find_all('div', {
     'class': 'report r1'})

    city_aqi = []
    for i in range(1):
        div_report_r1 = div_list[i ]
        aqi_value = div_report_r1.find('div', {
     'class': 'aqivalue'}).text.strip()
        aqi_leveltext = div_report_r1.find('div', {
     'class': 'aqileveltext'}).text.strip()

        city_aqi.append((aqi_value, aqi_leveltext))
    return city_aqi


def get_all_cities():
    """
        获取所有城市
    """
    url = 'http://www.pm25x.com/'
    city_list = []
    r = requests.get(url, timeout=30)
    soup = BeautifulSoup(r.text, 'lxml')

    city_div = soup.find_all('dl', {
     'class': 'citylist'})
    # city_link_list = city_div.find_all('a')
    for city in city_div:
        for city_link in city.find_all('a'):
            city_list.append(city_link.string, f"http://www.pm25x.com/city/{city_link['href']}")
    return city_list


def main():
    """
        主函数
    """
    city_list = get_all_cities()
    for city in city_list:
        city_name = city[0]
        city_pinyin = city[1]
        city_aqi = get_city_aqi(city_pinyin)
        print(city_name, city_aqi)


if __name__ == '__main__':
    main()

你可能感兴趣的:(python学习)