python爬取动态网站

# coding=utf-8    
from selenium import webdriver    
import re
import time
import os
import pymysql

# 安装方法
# pip install selenium  
# pip install pymysql
# pip install time
# http://chromedriver.storage.googleapis.com/index.html?path=2.9/  下载对应的版本放在python的根目录下 可以打开linux 
# https://blog.csdn.net/Fiverya/article/details/98869750  linux 配置  selenium  webdriver

# print("start")
#打开Firefox浏览器 设定等待加载时间
driver = webdriver.Chrome()
#定位节点
url = 'https://promo.guahao.com/topic/pneumonia?cs=share&from=groupmessage&isappinstalled=0'
# print(url)
driver.get(url)
while True:
    # time.sleep( 2 ) # 延迟5秒?
    time.sleep( 5 ) # 延迟5秒
    content = driver.find_elements_by_xpath("//div[@class='z-fc-color din-font']")
    dataclor = driver.find_elements_by_xpath("//span[@class='z-fc-color day']")
    datatimes = driver.find_elements_by_xpath("//span[@class='label']")
    # guolei = driver.find_elements_by_xpath("//ul[@class='p-topic-pneumonia--epidemic-map-list--m-province z-expand']/li[1]")
    print(datatimes)
    print(dataclor)
    print(content)
    # print(guolei[0].text)
    data = {
        'id': '1'
    }

    for y in content:
        if content.index(y) == 0:
            data['confirmation'] = y.text
        elif content.index(y) == 1:
            data['suspected'] = y.text 
        elif content.index(y) == 2:
            data['deathtoll'] = y.text
        elif content.index(y) == 3:
            data['cure'] = y.text

    for x in dataclor:
        if dataclor.index(x) == 0:
            data['yesterday1'] = x.text
        elif dataclor.index(x) == 1:
            data['yesterday2'] = x.text 
        elif dataclor.index(x) == 2:
            data['yesterday3'] = x.text
        elif dataclor.index(x) == 3:
            data['yesterday4'] = x.text
            
    data['datatime'] = datatimes[0].text
    print(data)

    # driver.close() # 关闭浏览器
    if 'datatime' in data.keys():
        db = pymysql.connect(host='127.0.0.1',user='root', password='', port=3306, db='mbhlw')
        cursor = db.cursor()
        sql = 'select * from md_condition;'
        cursor.execute(sql)
        datas =cursor.fetchone()
        print(datas[9])
        
        if str(data['datatime']) != str(datas[9]):
            table = 'md_condition'
            # keys = ', '.join(data.keys())
            # values = ', '.join(['%s'] * len(data))
            sql = 'UPDATE md_condition SET confirmation = "'+data["confirmation"]+'", suspected = "'+data["suspected"]+'", deathtoll ="'+data["deathtoll"]+'", cure = "'+data["cure"]+'", yesterday1 = "'+data["yesterday1"]+'", yesterday2 = "'+data["yesterday2"]+'", yesterday3 = "'+data["yesterday3"]+'", yesterday4 = "'+data["yesterday4"]+'", datatime = "'+data["datatime"]+'" WHERE id='+data["id"]+''
            print(sql)
            try:
                cursor.execute(sql)
                print('Successful')
                db.commit()
            except:
                print('Failed')
                db.rollback()
        cursor.close()
        db.close()

    time.sleep(10)
    driver.refresh()  #刷新页面

 

你可能感兴趣的:(python)