新冠肺炎/NCP/COVID-2019/SARS2 数据查询/可视化项目(一)

一个简单项目

涉及内容:数据调入,过滤,展示

数据来源于https://github.com/839Studio/Novel-Coronavirus-Updates/blob/master/README.md,经石墨文档下载。

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
plt.rcParams['font.sans-serif']=['SimHei'] 

class NCP(object):

    def __init__(self):
        
        data = pd.read_excel('./Data/NCP_20200226_morn.xlsx')
        # drop the useless
        data = data.drop(['消息来源','来源链接1','来源链接2','来源链接3','备注'], axis = 1)
        # define the province list, only including China
        self.provlist = ['全国', '湖北外', '北京','天津','上海','重庆','河北','山西','辽宁','吉林','黑龙江','江苏','浙江','安徽',\
            '福建','江西','山东','河南','湖北','湖南','广东','海南','四川','贵州','云南','陕西','甘肃','青海',\
            '台湾','内蒙古','广西','西藏','宁夏','新疆','香港','澳门']
        # filtrate the domestic provinces
        self.total_dom = data[data['省份'].isin(self.provlist)]
        # aquire the increment data
        self.new_dom = self.query_new_dom('全国')
    
    def __prsv__(self):
    	'''
    	To save the data, for local input
    	'''
        for i in self.provlist:
            self.query_new_dom(i)
            self.query_total_dom(i)
        return None

    def query_new_dom(self, prov:str) -> pd.DataFrame:
        '''
        Return the increment data for a certain area, sorted and summed according to the date
        
        Parameters:
        --------------------
        prov: str
            name of a certain province
        '''

        if (prov in self.provlist)&(prov != '全国'):
            daily_new = self.total_dom[self.total_dom['省份'] == prov].groupby('报道时间').aggregate(np.sum)
            for i in self.new_dom.index:
                if i not in daily_new.index:
                    daily_new.loc[i] = [0,0,0]
        elif prov == '全国':
            daily_new = self.total_dom.groupby('报道时间').aggregate(np.sum)
        elif prov == '湖北外':
            daily_new = self.query_new_dom('全国')-self.query_new_dom('湖北')
        else:
            return None
        
        daily_new.sort_index(inplace = True)
        daily_new.to_excel('./Data/NCP_Daily_New_dom.xlsx', sheet_name = prov)
        return daily_new

    def query_total_dom(self, prov: str) -> pd.DataFrame:
        '''
            Aquire the sum-up data
            
            Parameters:
            -------------------
            prov: str
                name of a certain province
        '''
        daily_total = self.query_new_dom(prov).cumsum()
        daily_total.to_excel('./Data/NCP_Daily_Sum_dom.xlsx', sheet_name = prov)
        return daily_total    
    

    def depict_new(self, prov: str) -> plt.figure:
        '''
        Depict the increment data
        
        Parameters:
        -------------------------
        prov: str 
            name of a certain province
        '''
        data = self.query_new_dom(prov)

        f = plt.figure(figsize = [15,12])
        
        plt.subplot(2,1,1)
        x = data.index
        y = data['新增确诊']
        plt.plot(x, y, marker = 'o', label = '新增确诊')
        gap = max(y)-min(y)
        plt.ylim(-0.1*gap, max(y)+0.1*gap)
        for a,b in zip(data.index,data['新增确诊']):
            plt.text(a, b, '%.0f' % b, ha='right', va= 'bottom', fontsize=10)
        plt.legend()
        plt.title('肺炎新增数据--'+prov)
        
        plt.subplot(2,1,2)
        y1 = data['新增出院']
        y2 = data['新增死亡']
        plt.plot(x, y1, marker = 'o', label = '新增出院')
        plt.plot(x, y2, marker = 'o', label = '新增死亡')
        gap = max(max(y1),max(y2))-min(min(y1),min(y2))
        plt.ylim(-0.1*gap, max(max(y1),max(y2))+0.1*gap)
        for a,b in zip(data.index,data['新增出院']):
            plt.text(a, b, '%.0f' % b, ha='right', va= 'bottom',fontsize=12)
        for a,b in zip(data.index,data['新增死亡']):
            plt.text(a, b, '%.0f' % b, ha='left', va= 'top',fontsize=12)
        plt.legend()
        plt.title('肺炎新增数据--'+prov)

        return f

    def depict_total(self, prov: str) -> plt.figure:
        '''
        depict the sumup data

		Parameters:
		---------------------------
        prov: str 
            name of a certain province
        '''
        data = self.query_total_dom(prov)

        f = plt.figure(figsize = [15,12])
        
        plt.subplot(2,1,1)
        x = data.index
        y = data['新增确诊']
        plt.plot(x, y, marker = 'o', label = '累计确诊')
        gap = max(y)-min(y)
        plt.ylim(-0.1*gap, max(y)+0.1*gap)
        for a,b in zip(data.index,data['新增确诊']):
            plt.text(a, b, '%.0f' % b, ha='right', va= 'bottom', fontsize=10)
        plt.legend()
        plt.title('肺炎累计数据--'+prov)
        
        plt.subplot(2,1,2)
        y1 = data['新增出院']
        y2 = data['新增死亡']
        plt.plot(x, y1, marker = 'o', label = '累计出院')
        plt.plot(x, y2, marker = 'o', label = '累计死亡')
        gap = max(max(y1),max(y2))-min(min(y1),min(y2))
        plt.ylim(-0.1*gap, max(max(y1),max(y2))+0.1*gap)
        for a,b in zip(data.index,data['新增出院']):
            plt.text(a, b, '%.0f' % b, ha='right', va= 'bottom',fontsize=12)
        for a,b in zip(data.index,data['新增死亡']):
            plt.text(a, b, '%.0f' % b, ha='left', va= 'top',fontsize=12)
        plt.legend()
        plt.title('肺炎累计数据--'+prov)

        return f

暂时只有这几个功能,其他的想到再做吧,好像没什么需求。做城市做外国都一样。倒是想看看省份-城市那种直接调整的索引列表怎么做的。总而言之是无聊找点事做。


Some Translation:
What’s warmer than the wild sea is the stomach of bear
Helpless shrimp soothes its anxiety against Gastric acid.

你可能感兴趣的:(新冠肺炎/NCP/COVID-2019/SARS2 数据查询/可视化项目(一))