Python疫情数据爬取与可视化

使用Python爬取腾讯新闻疫情数据,并使用pyecharts可视化,绘制增长人数地图、柱状图、折线图。

文章目录

    • 1.分析网页
    • 2.导入模块
    • 3.抓取数据
    • 4.提取数据并写入Excel
    • 5.国内各地区现有确诊人数地图
    • 6.国内各地区现有确诊人数柱状图
    • 7.国内各地区现有确诊人数折线图
    • 8.国内各地区累计确诊人数地图
    • 9.国内各地区累计确诊人数柱状图
    • 10.国内各地区累计确诊人数折线图
    • 11.完整代码

1.分析网页

  • 通过腾讯新闻公布的数据进行爬取
  • 网址:https://news.qq.com/zt2020/page/feiyan.htm#/
  • 对于静态网页,我们只需要把网页地址栏中的url传到get请求中就可以轻松地获取到网页的数据。
    Python疫情数据爬取与可视化_第1张图片

2.导入模块

import time
import json
import requests
from datetime import datetime
import pandas as pd
import numpy as np

3.抓取数据

def Domestic():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    reponse = requests.get(url=url).json()
    data = json.loads(reponse['data'])
    return data
domestic = Domestic()

4.提取数据并写入Excel

areaTree = domestic['areaTree']
china_data = areaTree[0]['children']
china_list = []
list_province=[] #柱状图
list_nowConfirm=[]  #柱状图
list_conrirm=[]     #累计确证
for a in range(len(china_data)):
    province = china_data[a]['name']
    confirm = china_data[a]['total']['confirm']
    heal = china_data[a]['total']['heal']
    dead = china_data[a]['total']['dead']
    nowConfirm = confirm - heal - dead
    china_dict = {}
    china_dict['province'] = province
    china_dict['nowConfirm'] = nowConfirm
    china_dict['confirm']=confirm
    china_list.append(china_dict)
    list_province.append(province)
    list_nowConfirm.append(nowConfirm)
    list_conrirm.append(confirm)
china_data = pd.DataFrame(china_list)
china_data.to_excel('疫情数据.xlsx',index=False) #存储为EXCEL文件
china_data.head()

5.国内各地区现有确诊人数地图

import pyecharts.options as opts
from pyecharts.charts import Map
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
m = Map()
m.add("", [
    list(z)
    for z in zip(list(china_data["province"]), list(china_data["nowConfirm"]))
],
      maptype="china",
      is_map_symbol_show=False)
m.set_global_opts(
    title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数地图"),
    visualmap_opts=opts.VisualMapOpts(
        is_piecewise=True,
        pieces=[
            {
                "min": 5000,
                "label": '>5000',
                "color": "#893448"
            },  # 不指定 max,表示 max 为无限大
            {
                "min": 1000,
                "max": 4999,
                "label": '1000-4999',
                "color": "#ff585e"
            },
            {
                "min": 500,
                "max": 999,
                "label": '500-1000',
                "color": "#fb8146"
            },
            {
                "min": 101,
                "max": 499,
                "label": '101-499',
                "color": "#ffA500"
            },
            {
                "min": 10,
                "max": 100,
                "label": '10-100',
                "color": "#ffb248"
            },
            {
                "min": 1,
                "max": 9,
                "label": '1-9',
                "color": "#fff2d1"
            },
            {
                "max": 1,
                "label": '0',
                "color": "#ffffff"
            }
        ]))
m.render("地图.html")

Python疫情数据爬取与可视化_第2张图片

6.国内各地区现有确诊人数柱状图

from pyecharts import options as opts
from pyecharts.charts import Bar
bar = (
    Bar()
    .add_xaxis(list_province)
    .add_yaxis("柱状图", list_nowConfirm)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数树状图"),
        yaxis_opts=opts.AxisOpts(name="现有确证病例"),
        xaxis_opts=opts.AxisOpts(name="地区"),
        datazoom_opts=opts.DataZoomOpts(type_="slider")
    )
)
bar.render("柱状图.html")

Python疫情数据爬取与可视化_第3张图片

7.国内各地区现有确诊人数折线图

import pyecharts.options as opts
from pyecharts.charts import Line
line=(
    Line()
    .add_xaxis(xaxis_data=list_province)
    .add_yaxis(
        series_name="折线图",
        y_axis=list_nowConfirm,
        is_connect_nones=True
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数折线图"),
                     yaxis_opts=opts.AxisOpts(name="现有确证病例"),
                     xaxis_opts=opts.AxisOpts(name="地区"),
                     datazoom_opts=opts.DataZoomOpts(type_="slider")
     )
)
line.render("折线图.html")

Python疫情数据爬取与可视化_第4张图片

8.国内各地区累计确诊人数地图


```python

```python

```python

```python
import pyecharts.options as opts
from pyecharts.charts import Map
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
m1 = Map()
m1.add("", [
    list(z)
    for z in zip(list(china_data["province"]), list(china_data["nowConfirm"]))
],
      maptype="china",
      is_map_symbol_show=False)
m1.set_global_opts(
    title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数地图"),
    visualmap_opts=opts.VisualMapOpts(
        is_piecewise=True,
        pieces=[
            {
                "min": 5000,
                "label": '>5000',
                "color": "#893448"
            },  # 不指定 max,表示 max 为无限大
            {
                "min": 1000,
                "max": 4999,
                "label": '1000-4999',
                "color": "#ff585e"
            },
            {
                "min": 500,
                "max": 999,
                "label": '500-1000',
                "color": "#fb8146"
            },
            {
                "min": 101,
                "max": 499,
                "label": '101-499',
                "color": "#ffA500"
            },
            {
                "min": 10,
                "max": 100,
                "label": '10-100',
                "color": "#ffb248"
            },
            {
                "min": 1,
                "max": 9,
                "label": '1-9',
                "color": "#fff2d1"
            },
            {
                "max": 1,
                "label": '0',
                "color": "#ffffff"
            }
        ]))
m1.render("地图.html")

Python疫情数据爬取与可视化_第5张图片

9.国内各地区累计确诊人数柱状图

from pyecharts import options as opts
from pyecharts.charts import Bar
bar = (
    Bar()
    .add_xaxis(list_province)
    .add_yaxis("累计确诊柱状图", list_conrirm)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="COVID-19中国现有地区累计确诊人数树状图"),
        yaxis_opts=opts.AxisOpts(name="累计确诊病例"),
        xaxis_opts=opts.AxisOpts(name="地区"),
        datazoom_opts=opts.DataZoomOpts(type_="slider")
    )
)
bar.render("累计确诊柱状图.html")

Python疫情数据爬取与可视化_第6张图片

10.国内各地区累计确诊人数折线图

import pyecharts.options as opts
from pyecharts.charts import Line
line=(
    Line()
    .add_xaxis(xaxis_data=list_province)
    .add_yaxis(
        series_name="累计确诊折线图",
        y_axis=list_conrirm,
        is_connect_nones=True
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="COVID-19中国地区累计现有确诊人数折线图"),
                     yaxis_opts=opts.AxisOpts(name="累计确诊病例"),
                     xaxis_opts=opts.AxisOpts(name="地区"),
                     datazoom_opts=opts.DataZoomOpts(type_="slider")
     )
)
line.render("累计确诊折线图.html")

Python疫情数据爬取与可视化_第7张图片

11.完整代码

import time
import json
import requests
from datetime import datetime
import pandas as pd
import numpy as np

import pyecharts.options as opts
from pyecharts.charts import Map
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB

from pyecharts import options as opts
from pyecharts.charts import Bar

import pyecharts.options as opts
from pyecharts.charts import Line


def Domestic():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    reponse = requests.get(url=url).json()
    data = json.loads(reponse['data'])
    return data
domestic = Domestic()
areaTree = domestic['areaTree']
china_data = areaTree[0]['children']
china_list = []
list_province=[] #柱状图
list_nowConfirm=[]  #柱状图
list_conrirm=[]     #累计确证
for a in range(len(china_data)):
    province = china_data[a]['name']
    confirm = china_data[a]['total']['confirm']
    heal = china_data[a]['total']['heal']
    dead = china_data[a]['total']['dead']
    nowConfirm = confirm - heal - dead
    china_dict = {}
    china_dict['province'] = province
    china_dict['nowConfirm'] = nowConfirm
    china_dict['confirm']=confirm
    china_list.append(china_dict)
    list_province.append(province)
    list_nowConfirm.append(nowConfirm)
    list_conrirm.append(confirm)
china_data = pd.DataFrame(china_list)
china_data.to_excel('疫情数据.xlsx',index=False) #存储为EXCEL文件
china_data.head()

m = Map()
m.add("", [
    list(z)
    for z in zip(list(china_data["province"]), list(china_data["nowConfirm"]))
],
      maptype="china",
      is_map_symbol_show=False)
m.set_global_opts(
    title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数地图"),
    visualmap_opts=opts.VisualMapOpts(
        is_piecewise=True,
        pieces=[
            {
                "min": 5000,
                "label": '>5000',
                "color": "#893448"
            },  # 不指定 max,表示 max 为无限大
            {
                "min": 1000,
                "max": 4999,
                "label": '1000-4999',
                "color": "#ff585e"
            },
            {
                "min": 500,
                "max": 999,
                "label": '500-1000',
                "color": "#fb8146"
            },
            {
                "min": 101,
                "max": 499,
                "label": '101-499',
                "color": "#ffA500"
            },
            {
                "min": 10,
                "max": 100,
                "label": '10-100',
                "color": "#ffb248"
            },
            {
                "min": 1,
                "max": 9,
                "label": '1-9',
                "color": "#fff2d1"
            },
            {
                "max": 1,
                "label": '0',
                "color": "#ffffff"
            }
        ]))
m.render("地图.html")



bar = (
    Bar()
    .add_xaxis(list_province)
    .add_yaxis("柱状图", list_nowConfirm)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数树状图"),
        yaxis_opts=opts.AxisOpts(name="现有确证病例"),
        xaxis_opts=opts.AxisOpts(name="地区"),
        datazoom_opts=opts.DataZoomOpts(type_="slider")
    )
)
bar.render("柱状图.html")

line=(
    Line()
    .add_xaxis(xaxis_data=list_province)
    .add_yaxis(
        series_name="折线图",
        y_axis=list_nowConfirm,
        is_connect_nones=True
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="COVID-19中国现有地区现有确诊人数折线图"),
                     yaxis_opts=opts.AxisOpts(name="现有确诊病例"),
                     xaxis_opts=opts.AxisOpts(name="地区"),
                     datazoom_opts=opts.DataZoomOpts(type_="slider")
     )
)
line.render("折线图.html")

m1 = Map()
m1.add("", [
    list(z)
    for z in zip(list(china_data["province"]), list(china_data["confirm"]))
],
      maptype="china",
      is_map_symbol_show=False)
m1.set_global_opts(
    title_opts=opts.TitleOpts(title="COVID-19中国地区累计确诊人数地图"),
    visualmap_opts=opts.VisualMapOpts(
        is_piecewise=True,
        pieces=[
            {
                "min": 5000,
                "label": '>5000',
                "color": "#893448"
            },  # 不指定 max,表示 max 为无限大
            {
                "min": 1000,
                "max": 4999,
                "label": '1000-4999',
                "color": "#ff585e"
            },
            {
                "min": 500,
                "max": 999,
                "label": '500-1000',
                "color": "#fb8146"
            },
            {
                "min": 101,
                "max": 499,
                "label": '101-499',
                "color": "#ffA500"
            },
            {
                "min": 10,
                "max": 100,
                "label": '10-100',
                "color": "#ffb248"
            },
            {
                "min": 1,
                "max": 9,
                "label": '1-9',
                "color": "#fff2d1"
            },
            {
                "max": 1,
                "label": '0',
                "color": "#ffffff"
            }
        ]))
m1.render("累计确诊地图.html")

bar = (
    Bar()
    .add_xaxis(list_province)
    .add_yaxis("累计确诊柱状图", list_conrirm)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="COVID-19中国地区累计确诊人数树状图"),
        yaxis_opts=opts.AxisOpts(name="累计确诊病例"),
        xaxis_opts=opts.AxisOpts(name="地区"),
        datazoom_opts=opts.DataZoomOpts(type_="slider")
    )
)
bar.render("累计确诊柱状图.html")

line=(
    Line()
    .add_xaxis(xaxis_data=list_province)
    .add_yaxis(
        series_name="累计确诊折线图",
        y_axis=list_conrirm,
        is_connect_nones=True
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="COVID-19中国地区累计现有确诊人数折线图"),
                     yaxis_opts=opts.AxisOpts(name="累计确诊病例"),
                     xaxis_opts=opts.AxisOpts(name="地区"),
                     datazoom_opts=opts.DataZoomOpts(type_="slider")
     )
)
line.render("累计确诊折线图.html")

你可能感兴趣的:(Python,python,爬虫,echarts,数据可视化)