网络爬虫之在线获取城市空气质量指数(AQI)源代码

#!/usr/bin/env python
#-*- coding:utf-8 -*-
'''
@author  : FIGTHING
@file    : Air_quality_computing.py
@function: Get city air quality information
@software: Pycharm
@time    : 2019/04/03/17:37
'''


import csv
import os
import requests
from bs4 import BeautifulSoup

def get_city_aqi(city_pinyin):
    """
      Get the city's AQI
    """
    url = "http://www.pm25.in/" + city_pinyin
    r = requests.get(url,timeout=30)

    #create a BeautifulSoup object
    soup = BeautifulSoup(r.text,"lxml")

   
    """
     Get web page information
    """
    div_list = soup.find_all("div",{"class":"span1"})

    #get every span1`s information that belong to class
    city_aqi = []
    for i in range(8):
        div_content = div_list[i]       #Get every span1
        #Get the values in caption and value in span1. .text.strip() means to get the 
        #content and remove the spaces before and after
        value = div_content.find("div",{"class":"value"}).text.strip()
        caption = div_content.find("div",{"class":"caption"}).text.strip()
        city_aqi.append(value)
    #print(city_aqi)
    return city_aqi

def get_all_cities():
    """
        Get every city name
    """
    url = "http://www.pm25.in/"

    r = requests.get(url, timeout=30)
    
    # Create a BeautifulSoup Object
    soup = BeautifulSoup(r.text, "lxml")

    #Extract content in a box containing data
    city_list = []
    city_div = soup.find_all("div",{"class":"bottom"})[1]
    city_link_list = city_div.find_all("a")
    for city_link in city_link_list:
        city_name = city_link.text
        city_pinyin = city_link["href"][1:]
        city_list.append((city_name,city_pinyin))
    return city_list
def main():
    """
       main function
    """
    city_list = get_all_cities()
    #Saving CSV file
    header = ["city","AQI","PM2.5/1h","PM10/1h","CO/1h","NO2/1h","O3/1h","O3/8h","SO2/1h"]
    with open("city_air_quality_aqi.csv","w",encoding="utf-8",newline="") as f:
        writer = csv.writer(f)
        writer.writerow(header)
        for i, city in enumerate(city_list):
            if (i+1)%10 == 0:
                print("Saving {} Data (Total {} Data)".format(i + 1,len(city_list)))
            city_name = city[0]
            city_pinyin = city[1]
            city_aqi = get_city_aqi(city_pinyin)

            row = [city_name] + city_aqi
            writer.writerow(row)


if __name__ == "__main__" :
    main()

"Dreams save us. Dreams lift us up and transform us into something better." --《超人大战猎杀团》

你可能感兴趣的:(Python3.7,网络爬虫)