python 爬取世界各国以及中国各省份疫情历史数据

python 爬取世界各国以及中国各省份疫情历史数据

# title       :  data_sync
#description  :  获取世界各国以及中国各省份疫情历史数据
#author       :  qianyulin
#email        :  [email protected]
#date         :  2021-10-30 12:00:00
#version      :  1.0
#usage        :  python3 yq_data_pro.py
#python_version: 3.7.2
#======================================================================================================================================================================================


import requests
import pandas as pd
from tqdm import tqdm
import time

class WorldVirusSpider(object):
    def __init__(self):
        self.area_url = 'https://c.m.163.com/ug/api/wuhan/app/data/list-total'
        self.home_url = 'https://c.m.163.com/ug/api/wuhan/app/data/list-by-area-code?areaCode='

    def get_json_from_url(self, url):
        """
        根据url,获取响应内容的字符串数据
        :param url :请求url
        :return: 响应内容的字符串
        """
        headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36"}
        response = requests.get( url= url,headers = headers)
        return response.json()


    def data_analyz(self,area_name,data):
        """
        解析json内容,获取Python数据
        :param  data:首页内容
        :return: 解析后的Python数据
            add_confirm  # 新增确诊
            total_confirm #累计确诊
            heal  #累计治愈
            dead  #累计死亡
            now_confirm  #现有确诊
        """
        date = data['date']
        add_confirm = data['today']['confirm']
        total_confirm = data['total']['confirm']
        heal = data['total']['heal']
        dead = data['total']['dead']
        now_confirm = total_confirm - heal -  dead
        return {'area_name':area_name,'date':date,'now_confirm':now_confirm,'add_confirm':add_confirm,'total_confirm':total_confirm,'heal':heal,'dead':dead}


    def get_countryAndcity_id(self):
        # 获取 全世界国家代码及城市代码
        area_code_list = []
        data_json = self.get_json_from_url(self.area_url)
        data = data_json['data']['areaTree']
        for country_list in data:
            if country_list['name'] == '中国':
                for province_list in country_list['children'] :
                    for city_list in province_list['children']:
                        area_code_list.append({'country_name':country_list['name'],'country_id':country_list['id'],'province_name':province_list['name'],'province_id':province_list['id'],'city_name':city_list['name'],'city_id':city_list['id']})
            else:
                area_code_list.append({'country_name': country_list['name'], 'country_id': country_list['id'],'province_name': 'null', 'province_id': 'null','city_name': 'null', 'city_id': 'null'})
        area_code_df = pd.DataFrame(area_code_list)
        return area_code_df

    # 获取不同代码的历史数据
    def get_area_yq_data_his(self,area_code,area_name):
        yq_datalist = []
        data_json = self.get_json_from_url(self.home_url+area_code)
        yq_json = data_json['data']['list']
        for data in yq_json:
            yq_datalist.append(self.data_analyz(area_name,data))
        return pd.DataFrame(yq_datalist)


    def list_with(self,Series):
        """
        df去重
        :param  Series:andas.core.series.Series
        :return: str
        """
        return ''.join(list(set(Series)))


    def save_to_excel(self,df,sheetName):
        with  pd.ExcelWriter('/Users/qianyulin/Desktop/dongzhang/yq_data/yq_data_all.xlsx') as writer:
            df.to_excel(excel_writer=writer,sheet_name=sheetName)
            writer.save()

    def get_res_code(self):
        # 获取目标区域的id 根据自己需求来
        res_area_code_list = []
        area_df = self.get_countryAndcity_id()
        res_area_code_list.append({'area_code':self.list_with(area_df[area_df.country_name == '中国']['country_id']), 'area_name':'中国'})
        res_area_code_list.append({'area_code':self.list_with(area_df[area_df.province_name == '上海']['province_id']), 'area_name':'上海'})
        res_area_code_list.append({'area_code':self.list_with(area_df[area_df.province_name == '浙江']['province_id']), 'area_name': '浙江'})
        res_area_code_list.append({'area_code':self.list_with(area_df[area_df.province_name == '安徽']['province_id']), 'area_name': '安徽'})
        res_area_code_list.append({'area_code':self.list_with(area_df[area_df.province_name == '江苏']['province_id']), 'area_name': '江苏'})
        res_area_code_list.append({'area_code':self.list_with(area_df[area_df.province_name == '福建']['province_id']), 'area_name': '福建'})
        res_area_code_list.append({'area_code':self.list_with(area_df[area_df.province_name == '江西']['province_id']), 'area_name': '江西'})
        return res_area_code_list

    def run(self):
        try:
            _con_df = pd.DataFrame([{'area_name':'null','date':'null','now_confirm':'null','add_confirm':'null','total_confirm':'null','heal':'null','dead':'null'}])
            _area_list = self.get_res_code()
            for area in tqdm(_area_list,desc = 'Processing'):
                df = self.get_area_yq_data_his(area['area_code'],area['area_name'])
                _con_df = pd.concat([_con_df,df],axis=0) #0 行拼接 1 列拼接
                time.sleep(0.5)
            _con_df= _con_df[(_con_df['area_name'] != 'null')]
            self.save_to_excel(_con_df,'confirm')
            print('=========Writer data Successful!=======')
        except:
            print('==================Error!===============')


if __name__ == '__main__':
    spider = WorldVirusSpider()
    spider.run()

你可能感兴趣的:(python,python,开发语言,后端)