Python爬取PM2.5数据并入mysql库

爬取网站:http://www.pm25.in/jiangyin,http://www.pm25.in/suzhou

完整代码:

# 导入模块
import datetime

from bs4 import BeautifulSoup
import requests
import pymysql

# 打开数据库连接,并使用cursor()建立一个游标对象
conn = pymysql.connect(host='localhost', user='root', passwd='root', db='mysql', port=3306, charset='utf8')

cursor = conn.cursor()

# 创建request对象,指定url和请求头(user-agent),目的是为了更真实的模拟浏览器
def get_temperature(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    }  # 设置头文件信息

    response = requests.get(url, headers=headers).content  # 提交requests.get请求,传递url和headers
    soup = BeautifulSoup(response, "lxml")  # 用Beautifulsoup 进行解析

    conmid = soup.find('div', class_='table')
    condate = soup.find('div', class_='live_data_time')
    condate2 = condate.find('p')
    city = soup.find('div', class_='city_name')
    conmid2 = conmid.find_all('tbody')

    for info in conmid2:
        tr_list = info.find_all('tr')[0:]  # 使用切片取到第三个tr标签
        for index, tr in enumerate(tr_list):  # enumerate可以返回元素的位置及内容
            td_list = tr.find_all('td')

            POSITION = td_list[0].text.replace('\n', '')
            date = condate2.text[7:26]
            print(date)
            AQI = td_list[1].text.replace('\n', '')
            GRADE = td_list[2].text.replace('\n', '')
            PM25 = td_list[4].text.replace('\n', '')
            PM10 = td_list[5].text.replace('\n', '')
            SO2 = td_list[10].text.replace('\n', '')
            CO = td_list[6].text.replace('\n', '')
            NO2 = td_list[7].text.replace('\n', '')
            O3_8h = td_list[9].text.replace('\n', '')
            CITYNAME = city.text

            sql = "INSERT INTO aqidata(POSITION, DATE, AQI, GRADE, PM25,PM10,SO2,CO,NO2,O3_8h,CITY) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
                POSITION, date, AQI, GRADE, PM25, PM10, SO2, CO, NO2, O3_8h, CITYNAME)
            cursor.execute(sql)


if __name__ == '__main__':
    urls = ['http://www.pm25.in/jiangyin', 'http://www.pm25.in/suzhou']

    for url in urls:
        get_temperature(url)
    conn.commit()

你可能感兴趣的:(Python爬取PM2.5数据并入mysql库)