【数据可视化】pyecharts实现全国和全球疫情可视化

实验目的和要求

在本次实验中基于当前的疫情数据完成各种可视化的构建:

(1)对全国总数据绘制饼图;

(2)对全国各地数据进行地图的绘制

(3)对全国的确诊人数做词云图

(4)对山东(湖北)的确诊人数做词云图

(5)进行更复杂的全国(全球)疫情地图的绘制

博客链接:https://www.iamzlt.com/?p=251

前导内容

安装pyecharts

1、进入官网或点击这里下载安装包Pyecharts 下载最新版本

2、将Pyecharts 文件放到Anaconda3 的pkgs 目录下

【数据可视化】pyecharts实现全国和全球疫情可视化_第1张图片

3、打开Anaconda Prompt,输入cd anaconda3/pkgs 进入文件夹

【数据可视化】pyecharts实现全国和全球疫情可视化_第2张图片

5 继续安装插件

$ pip install echarts-countries-pypkg 全球国家地图
$ pip install echarts-china-provinces-pypkg 中国省份地图
$ pip install echarts-china-cities-pypkg 中国城市地图
$ pip install wordcloud 词云图包

实验内容一

全国数据绘制饼图,keshihua.py文件

1.获取数据

import json
import requests

def catch_data():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    reponse = requests.get(url=url).json()
    data = json.loads(reponse['data'])
    return data

if __name__ == '__main__':
    data = catch_data()
    print(data.keys())
    lastUpdateTime = data['lastUpdateTime']
    chinaTotal= data['chinaTotal']
    chinaAdd = data['chinaAdd']
    print(chinaTotal)
    print(chinaAdd)

运行结果:

dict_keys(['lastUpdateTime', 'chinaTotal', 'chinaAdd', 'isShowAdd', 'showAddSwitch', 'areaTree'])
{'confirm': 84407, 'heal': 79209, 'dead': 4643, 'nowConfirm': 555, 'suspect': 5, 'nowSevere': 26, 'importedCase': 1678, 'noInfect': 903}
{'confirm': 3, 'heal': 83, 'dead': 0, 'nowConfirm': -80, 'suspect': 3, 'nowSevere': -3, 'importedCase': 2, 'noInfect': 20}

2.对总数据进行饼图绘制

import json
import requests

def catch_data():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    reponse = requests.get(url=url).json()
    data = json.loads(reponse['data'])
    return data
    
from pyecharts.charts import Pie
from pyecharts import options as opts

def create_pie():
    data = catch_data()
    lastUpdateTime = data['lastUpdateTime']
    chinaTotal= data['chinaTotal']
    
    #创建实例对象
    pie = Pie(init_opts=opts.InitOpts(width='1000px',height='600px'))
    
    #添加数据
    pie.add(series_name='腾讯数据', data_pair=[list(z) for z in zip(chinaTotal.keys(), chinaTotal.values())])
        
    #设置全局项
    pie.set_global_opts(title_opts=opts.TitleOpts(title='全国总数据',pos_left='center',pos_top=20))
    #设置每项数据战机
    pie.set_series_opts(tooltip_opts=opts.TooltipOpts(trigger='item', formatter="{a} 
{b}:{c}")) pie.render(path="Bing1.html") if __name__ == '__main__': create_pie() ''' data = catch_data() print(data.keys()) lastUpdateTime = data['lastUpdateTime'] chinaTotal= data['chinaTotal'] chinaAdd = data['chinaAdd'] print(chinaTotal) print(chinaAdd) '''

运行结果:Bing1

     

Todo 工作:

(1)把上面饼图的 confirm 等字样 改成中文

import json
import requests

def catch_data():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    reponse = requests.get(url=url).json()
    data = json.loads(reponse['data'])
    return data
    
from pyecharts.charts import Pie
from pyecharts import options as opts

def create_pie():
    data = catch_data()
    lastUpdateTime = data['lastUpdateTime']
    chinaTotal= data['chinaTotal']
    print(chinaTotal.keys())
    chinaTotal_en = {}
    key_en = ['确诊','治愈','死亡','现有确诊','疑似感染','现有重症','重症','无症状感染者']
    i = 0 
    for key, value in zip(chinaTotal.keys(), chinaTotal.values()):
        chinaTotal_en[key_en[i]] = value
        i+=1;
    #创建实例对象
    pie = Pie(init_opts=opts.InitOpts(width='1000px',height='600px'))
    
    #添加数据
    pie.add(series_name='腾讯数据', data_pair=[list(z) for z in zip(chinaTotal_en.keys(), chinaTotal.values())])
        
    #设置全局项
    pie.set_global_opts(title_opts=opts.TitleOpts(title='全国总数据',pos_left='center',pos_top=20))
    #设置每项数据战机
    pie.set_series_opts(tooltip_opts=opts.TooltipOpts(trigger='item', formatter="{a} 
{b}:{c}")) pie.render(path="Bing1.html") if __name__ == '__main__': create_pie() #data.keys()=['上次更新','总和','增加','','','',''] #data = catch_data() #print(data.keys()) #lastUpdateTime = data['lastUpdateTime'] #chinaTotal= data['chinaTotal'] #chinaAdd = data['chinaAdd'] #print(chinaTotal) #print(chinaAdd)

运行结果:Bing1

     

(2)再编写函数对chinaAdd(新增数据)进行绘制,并去掉其中的某个项目,比如heal

import json
import requests

def catch_data():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    reponse = requests.get(url=url).json()
    data = json.loads(reponse['data'])
    return data
    
from pyecharts.charts import Pie
from pyecharts import options as opts

def create_pie():
    data = catch_data()
    lastUpdateTime = data['lastUpdateTime']
    chinaAdd= data['chinaAdd']
    chinaTotal_en = {}
    del(chinaAdd['suspect'])
    #创建实例对象
    pie = Pie(init_opts=opts.InitOpts(width='1000px',height='600px'))
    
    #添加数据
    pie.add(series_name='腾讯数据', data_pair=[list(z) for z in zip(chinaAdd.keys(), chinaAdd.values())])
        
    #设置全局项
    pie.set_global_opts(title_opts=opts.TitleOpts(title='全国总数据',pos_left='center',pos_top=20))
    #设置每项数据战机
    pie.set_series_opts(tooltip_opts=opts.TooltipOpts(trigger='item', formatter="{a} 
{b}:{c}")) pie.render(path="Bing2.html") if __name__ == '__main__': create_pie()

运行结果:Bing2

      

(3)从其他网站下载全球疫情数据,并将其绘制成饼图

# -*- coding: utf-8 -*-
"""
Created on Wed May  6 16:55:30 2020

@author: ZLT
"""

import json
import requests
import pandas as pd
import time

def time_c(timeNum):
    timeTemp = float(timeNum/1000)
    tupTime = time.localtime(timeTemp)
    stadardTime = time.strftime("%Y-%m-%d %H:%M:%S", tupTime)
    return stadardTime

def catch_data():
    url = 'https://lab.isaaclin.cn/nCoV/api/area?latest=0'
    data = requests.get(url=url).json()
    res = data['results']
    df = pd.DataFrame(res)
    for i in range(len(df)):
        df.iloc[i,16] = time_c(df.iloc[i,16])
    for i in range(len(df)):
        df.iloc[i,15] = df.iloc[i,16][5:10]
    df1 = df
    df2 = df1.drop_duplicates(['updateTime'],keep='last')
    date = []
    for i in range(len(df2)):
        date.append(df2.iloc[i,16])
    #print(date)
    tem = df1[df1['updateTime'] == '05-14']
    tem = tem.drop_duplicates(['provinceShortName'], keep='last')
    for i in date[1:41]:
        tem1 = df1[df1['updateTime'] == i]
        tem1= tem1.drop_duplicates(['provinceShortName'],keep='last')
        tem = tem.append(tem1)
    tem = tem.reset_index(drop=True)
    #print(tem)
    tem = tem[tem['cities'] == '05-14']
    print(tem)
    world_dict={}
    keys = ['currentConfirmedCount','confirmedCount','suspectedCount','curedCount','deadCount']
    for i in range(len(keys)):
        world_dict[keys[i]]=0
    print(world_dict)
    
    for i in range(len(keys)):
        #print(tem[keys[i]])
        world_dict[keys[i]] = sum(tem[keys[i]])
    print(world_dict)
    return world_dict
    
from pyecharts.charts import Pie
from pyecharts import options as opts

def create_pie():
    data = catch_data()

    #创建实例对象
    pie = Pie(init_opts=opts.InitOpts(width='1000px',height='600px'))
    
    #添加数据
    pie.add(series_name='数据', data_pair=[list(z) for z in zip(data.keys(), data.values())])
        
    #设置全局项
    pie.set_global_opts(title_opts=opts.TitleOpts(title='全球总数据',pos_left='center',pos_top=20))
    #设置每项数据战机
    pie.set_series_opts(tooltip_opts=opts.TooltipOpts(trigger='item', formatter="{a} 
{b}:{c}")) pie.render(path="Bing3.html") if __name__ == '__main__': #data = catch_data() create_pie()

运行结果:Bing3

       

实验内容二

全国各地数据进行地图绘制 keshihua2.py
 
  
import json
import requests
import pandas as pd
import time
from pyecharts.charts import Map
import pyecharts.options as opts

def catch_data():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    reponse = requests.get(url=url).json()
    data = json.loads(reponse['data'])
    return data

data = catch_data()
print(data.keys())

lastUpdateTime = data['lastUpdateTime']

areaTree = data['areaTree']

china_data = areaTree[0]['children']
china_list = []

for a in range(len(china_data)):
    province = china_data[a]['name']
    province_list = china_data[a]['children']
    for b in range(len(province_list)):
        city = province_list[b]['name']
        total = province_list[b]['total']
        today = province_list[b]['today']
        china_dict = {}
        china_dict['province'] = province
        china_dict['city'] = city
        china_dict['total'] = total
        china_dict['today'] = today
        china_list.append(china_dict)

china_data = pd.DataFrame(china_list)

def confirm(x):
    confirm = eval(str(x))['confirm']
    return confirm

def dead(x):
    dead = eval(str(x))['dead']
    return dead

def heal(x):
    heal = eval(str(x))['heal']
    return heal

china_data['confirm'] = china_data['total'].map(confirm)
china_data['dead'] = china_data['total'].map(dead)
china_data['heal'] = china_data['total'].map(heal)
china_data = china_data[["province","city","confirm","dead","heal"]]

#print(china_data.head())
#print(china_data)

area_data = china_data.groupby("province")["confirm"].sum().reset_index()
area_data.columns = ["province", "confirm"]
print(area_data)

(
     Map()
         .add("", [list(z) for z in zip(list(area_data["province"]), list(area_data["confirm"]))], "china",
              is_map_symbol_show=False)
         .set_global_opts(title_opts=opts.TitleOpts(title="2019_nCov中国各地总确诊人数地图"),
                          visualmap_opts=opts.VisualMapOpts(is_piecewise=True,
                                                        pieces=[
                                                            {"min": 5000, "label":'>5000', "color":"#893448"},
                                                            {"min": 1000, "max": 4999, "label":'1000~4999', "color":"#ff585e"},
                                                            {"min": 500, "max":999, "label":'500~1000', "color":"#fb8146"},
                                                            {"min": 101, "max":499, "label":'101~499', "color":"#ffA500"},
                                                            {"min": 10, "max":99, "label":'10~100', "color":"#ffb248"},
                                                            {"min": 0, "max":9,"label":'0~9', "color":"#fff2d1"}
                                                            ]
                                                            ))
         
 

).render(path="Bing4.html")


运行结果:Bing4

       

实验内容三

对全国确诊人数做词云图

1 保存csv 数据(还是keshihua2.py, 在中间 加入一行代码 再运行生成csv)

area_data.to_csv('china.csv', index=False)

2 制作词云图(新建keshihua3.py)

import pandas as pd
from wordcloud import WordCloud, ImageColorGenerator
import matplotlib.pyplot as plt

def draw_cloud(read_name):
    wc = WordCloud(font_path='simkai.ttf', background_color="white")
    fp = pd.read_csv(read_name, encoding='utf-8')
    name = list(fp.province)
    value = fp.confirm
    for i in range(len(name)):
        name[i] = str(name[i])
    dic = dict(zip(name, value))
    wc.generate_from_frequencies(dic)
    
    plt.imshow(wc)
    plt.axos("off")
    wc.to_file('china.png')
    
if __name__ == '__main__':
    draw_cloud('china.csv')
    

运行结果:

【数据可视化】pyecharts实现全国和全球疫情可视化_第3张图片

Todo 工作:生成的 csv 文件虽然在词云图中没有问题,但是用execl 等程序打开后,省名确实乱码,请优化使之用excel 打开后仍然可读。

area_data.to_csv('china.csv', index=False, encoding='utf-8-sig')

实验内容四

对山东(湖北)省确诊人数做词云图

1 保存csv 数据(还是keshihua2.py)

shandong_data=china_data.loc[china_data.province=='山东',['city','confirm']]
shandong_data.to_csv('shandong.csv',index=False,encoding='utf-8-sig')

2 绘制词云图(todo 在keshihua2.py 基础上改动实现)

import pandas as pd
from wordcloud import WordCloud, ImageColorGenerator
import matplotlib.pyplot as plt

def draw_cloud(read_name):
    wc = WordCloud(font_path='simkai.ttf', background_color="white")
    fp = pd.read_csv(read_name, encoding='utf-8')
    name = list(fp.city)
    value = fp.confirm
    for i in range(len(name)):
        name[i] = str(name[i])
    dic = dict(zip(name, value))
    wc.generate_from_frequencies(dic)
    
    plt.imshow(wc)
    plt.axis("off")
    wc.to_file('china.png')
    
if __name__ == '__main__':
    draw_cloud('shandong.csv')

运行结果:

【数据可视化】pyecharts实现全国和全球疫情可视化_第4张图片

实验内容五

在实验三的基础上生成一个动态的,显示所有信息的地图(keshihua4.py)

1 选定json 网址

https://ncportal.esrichina.com.cn/JKZX/yq_20200131.json 为了简单,直接使用20200131这一天的数据,该json 的结构请打开网页查看

import time
import json
import requests
import datetime
import pandas as pd
import numpy as np
from pyecharts.charts import Map
from pyecharts import options as opts

infoType = ['新增疑似', '累计疑似', '新增确诊', '累计确诊', '新增死亡', '累计死亡']
date = 0
def getInfo(types, results):
    #从字典中获取types信息(types in infoType)
    rDict = {}
    for item in results:
        rDict[item['name']] = item[types]
    return rDict

def formatProvince(proName):
    if proName.endswith('省') or proName.endswith('市'):
        return proName[:-1]
    elif proName == '内蒙古自治区':
        return '内蒙古'
    elif proName.endswith('自治区'):
        return proName[:2]
    elif proName.endswith('行政区'):
        return proName[:2]

def mapVisualmap(iType=infoType):
    #定义一个Map类
    c = Map()
    
    #设置图例显示列表
    pieces = []
    
    #获取网页内容
    result = getResult()[0]
    #返回时间信息
    
    #遍历项目类型
    for i in range(len(iType)):
        #将每一组数据添加到类c中
        rDict = getInfo(iType[i], result)
        c.add('{}'.format(iType[i]), [list(z) for z in zip([formatProvince(i) for i in rDict.keys()], 
              list(rDict.values()))], 'china')
        
        #将每一组数据中的最大最小值作为每一个图例区间
        pieces.append({'min': min(rDict.values()), 'max': max(rDict.values())})
        
    #设置类Map的全局属性,标题属性,visualmap属性,图例属性
    c.set_global_opts(
            title_opts=opts.TitleOpts(title="全国各省份疫情感染情况(截止:{})".format(date), pos_top=15, pos_right='center'),
            visualmap_opts=opts.VisualMapOpts(is_piecewise=True, pieces=pieces, type_= 'color'), 
            legend_opts=opts.LegendOpts(selected_mode='single', orient='vertical', pos_right=40, pos_top=40))
    return c


def formatProvince(proName):
    if proName.endswith('省') or proName.endswith('市'):
        return proName[:-1]
    elif proName == '内蒙古自治区':
        return '内蒙古'
    elif proName.endswith('自治区'):
        return proName[:2]
    elif proName.endswith('行政区'):
        return proName[:2]

def getResult():
    url = "https://ncportal.esrichina.com.cn/JKZX/yq_{}.json"
    headers = {'User-Agent': 'Mozilla/5.0 (windows NT 10.0; Win64; x64)'}
    global date
    date = 20200131
    print(url.format(date))
    
    req = requests.get(url.format(date), headers=headers)
    
    provinceInfo = []
    for i in range(34):
        provinceInfo.append(req.json()['features'][i]['properties'])
        
    return provinceInfo, date
    #返回各省、自治区、直辖市及港澳台的全部数据

if __name__ == '__main__':
    c = mapVisualmap()
    c.render(path="全国各省份疫情感染情况.html")

运行结果:全国各省份疫情感染情况

        

项目实战:绘制世界动态疫情地图:生成如下所示的html 文件

import json
import requests
import pandas as pd
import time
from pyecharts.charts import Map
from pyecharts import options as opts

def time_c(timeNum):
    timeTemp = float(timeNum/1000)
    tupTime = time.localtime(timeTemp)
    stadardTime = time.strftime("%Y-%m-%d %H:%M:%S", tupTime)
    return stadardTime

def catch_data():
    url = 'https://lab.isaaclin.cn/nCoV/api/area?latest=0'
    data = requests.get(url=url).json()
    res = data['results']
    df = pd.DataFrame(res)
    for i in range(len(df)):
        df.iloc[i,16] = time_c(df.iloc[i,16])
    for i in range(len(df)):
        df.iloc[i,15] = df.iloc[i,16][5:10]
    df1 = df
    df2 = df1.drop_duplicates(['updateTime'],keep='last')
    date = []
    for i in range(len(df2)):
        date.append(df2.iloc[i,16])
    #print(date)
    tem = df1[df1['updateTime'] == '05-14']
    tem = tem.drop_duplicates(['provinceShortName'], keep='last')
    for i in date[1:41]:
        tem1 = df1[df1['updateTime'] == i]
        tem1= tem1.drop_duplicates(['provinceShortName'],keep='last')
        tem = tem.append(tem1)
    tem = tem.reset_index(drop=True)
    #print(tem)
    tem = tem[tem['cities'] == '05-14']
    print(tem)
    tem.to_csv('world_5_14.csv',index=False,encoding='utf-8-sig')
    return tem


if __name__ == '__main__':
    area_data = catch_data()
    #create_pie()
    (
     Map()
         .add("", [list(z) for z in zip(list(area_data["countryEnglishName"]), list(area_data["currentConfirmedCount"]))], "world",
              is_map_symbol_show=False)
         .set_global_opts(title_opts=opts.TitleOpts(title="2019_nCov世界各地现确诊人数地图"),
                          visualmap_opts=opts.VisualMapOpts(is_piecewise=True,
                                                        pieces=[
                                                            {"min": 5000, "label":'>5000', "color":"#893448"},
                                                            {"min": 1000, "max": 4999, "label":'1000~4999', "color":"#ff585e"},
                                                            {"min": 500, "max":999, "label":'500~1000', "color":"#fb8146"},
                                                            {"min": 101, "max":499, "label":'101~499', "color":"#ffA500"},
                                                            {"min": 10, "max":99, "label":'10~100', "color":"#ffb248"},
                                                            {"min": 0, "max":9,"label":'0~9', "color":"#fff2d1"}
                                                            ]
                                                            ))
         .set_series_opts(label_opts=opts.LabelOpts(is_show=False))

    ).render(path="全球疫情.html")

运行结果:全球疫情

         

你可能感兴趣的:(数据处理)