从新浪微博上获取疫情最新信息

# -*- coding:utf-8 -*-
"""
Author: Edgar
Created time:2/1/2020 12:06 PM
爬取新浪微博中的相关信息
"""
import os
import json
import requests
import pymysql


class Virus(object):
    def __init__(self):
        super(Virus, self).__init__()
        self.url = "https://interface.sina.cn/news/wap/fymap2020_data.d.json"
        self.header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"}

    def get_json(self):
        """获取加载出来的json"""
        response = requests.get(self.url, self.header)
        try:
            response.raise_for_status()
        except:
            print("获取json文件失败")
        else:
            return response.json()

    def download_json(self, filename='data.json'):
        """下载json文件"""
        flag = True
        json_ = self.get_json()
        print(json_)

        if not os.path.exists(filename):
            with open(filename, "w") as file:
                json.dump(json_, file)
        else:
            while flag:
                answer = input("该目录已经存在文件 %s,是否删除该文件(y/n):  " % filename)
                if answer in ['y', 'Y']:
                    with open(filename, 'w') as file:
                        json.dump(json_, file)
                    flag = False
                elif answer in ['n', 'N']:
                    return
                else:
                    print("输入错误,请重新输入: ")

    @staticmethod
    def create_table():
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        # 创建一个表来保存每个省市的信息
        sql = "CREATE TABLE IF NOT EXISTS Virus_province(name VARCHAR(60) NOT NULL, ename varchar(20), value varchar(20), susNum varchar(20), deathNum varchar(20), cureNum varchar(20), city TEXT)"
        cursor.execute(sql)
        # 创建一个表来保存所有相关城市的相关信息
        sql = "CREATE TABLE IF NOT EXISTS Virus_city(province VARCHAR(20),name VARCHAR(20) NOT NULL ,conNum VARCHAR(20), susNum VARCHAR(20), cureNum VARCHAR(20), deathNum VARCHAR(20))"
        cursor.execute(sql)
        # 保存全球疫情信息
        sql = "CREATE TABLE IF NOT EXISTS Virus_world(name VARCHAR(20), value VARCHAR(20), susNum VARCHAR(20), deathNum VARCHAR(20), cureNum VARCHAR(20))"
        cursor.execute(sql)
        sql = "CREATE TABLE IF NOT EXISTS Virus_timeline(url varchar(100), title varchar(200), media varchar(40), date varchar(30));"
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    @staticmethod
    def insert_city(city):
        connect = pymysql.connect("localhost", 'root', "Edgar", 'virus')
        cursor = connect.cursor()
        sql = 'INSERT INTO virus_city(province,name, conNum, susNum, cureNum, deathNum) VALUES ("%s", "%s", "%s","%s","%s", "%s")' % (
            city.get("province"),
            city.get("name"),
            city.get("conNum"),
            city.get("susNum"),
            city.get("cureNum"),
            city.get("deathNum"))
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    @staticmethod
    def insert_province(province):
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        sql = 'INSERT INTO virus_province(name, ename, value, susNum, deathNum, cureNum, city) VALUES("%s","%s","%s","%s","%s","%s","%s")' % (
            province.get("name"),
            province.get("ename"),
            province.get("value"),
            province.get("susNum"),
            province.get("deathNum"),
            province.get("cureNum"),
            ",".join([i.get("name") for i in province.get("city")])
        )
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    @staticmethod
    def insert_world(world):
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        sql = 'INSERT INTO virus_world(name, value, susNum, deathNum, cureNum) VALUES ("%s","%s","%s","%s","%s")' % (
            world.get("name"), world.get("value"), world.get("susNum"),
            world.get("deathNum"), world.get("cureNum"))
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    @staticmethod
    def insert_timeline(data):
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        sql = "INSERT INTO virus_timeline(url, title, media, date) VALUES('%s','%s','%s','%s')" % (
            data.get("url"), data.get("title"), data.get("media"),
            data.get("date"))
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    def upload_data(self):
        data_json = self.get_json()
        data = data_json.get("data").get("list")
        for i in data:
            self.insert_province(i)
            for city in i.get("city"):
                city["province"] = i.get("name")
                self.insert_city(city)

        for world in data_json.get("data").get("worldlist"):
            self.insert_world(world)
        self.get_timeline()

    def refresh_data(self):
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        sql = 'TRUNCATE TABLE virus_province;'
        cursor.execute(sql)
        sql = "TRUNCATE TABLE virus_city;"
        cursor.execute(sql)
        sql = "TRUNCATE TABLE virus_world;"
        cursor.execute(sql)
        sql = "TRUNCATE TABLE virus_timeline;"
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()
        self.upload_data()

    def get_timeline(self):
        """获取timeline中的信息"""
        url = "https://interface.sina.cn/wap_api/wap_std_subject_feed_list.d.json?component_id=_conf_13|wap_zt_std_theme_timeline|http://news.sina.cn/zt_d/yiqing0121&page={}"
        count = 0
        while True:
            response = requests.get(url.format(count), headers=self.header)
            try:
                response.raise_for_status()
            except:
                return
            else:
                data = response.json().get("result").get("data").get("data")
                if data:
                    count += 1
                    for i in data:
                        self.insert_timeline(i)
                else:
                    return


if __name__ == '__main__':
    virus = Virus()
    # virus.create_table()
    # virus.download_json()

    # virus.upload_data()
    virus.refresh_data()
    # virus.get_timeline()

爬取部分内容如下:
virus_world:

name value susNum deathNum cureNum
中国 14411 19544 304 328
德国 8 3 0 0
西班牙 1 0 0 0
俄罗斯 2 0 0 0
柬埔寨 1 0 0 0
印度 2 0 0 0

virus_city:

province name conNum susNum cureNum deathNum
北京 海淀区 41 0 0 0
北京 怀柔区 1 0 0 0
北京 丰台区 16 0 0 0
北京 大兴区 22 0 0 0
北京 东城区 3 0 0 0
北京 昌平区 12 0 0 0
北京 西城区 22 0 0 0
北京 朝阳区 35 0 0 0
北京 石景山区 5 0 0 0

最新代码见 GitHub

你可能感兴趣的:(python,#,爬虫)