python爬取丁香园的疫情数据绘制Echarts地图

2020.2.4更新

完整的项目工程地址https://github.com/Arithmeticjia/wuhan

2020.1.26更新

几乎每天页面都在变,放一下最新的爬虫代码,由于页面需要滚动,加入了selenium

def china_wuhan(request):
    import requests
    from bs4 import BeautifulSoup
    from selenium import webdriver

    try:
        target = 'https://3g.dxy.cn/newh5/view/pneumonia?scene=2&clicktime=1579579384&enterid=1579579384&from=groupmessage&isappinstalled=0'
        # req = requests.get(url=target)
        # req.encoding = 'urf-8'
        # html = req.text
        option = webdriver.ChromeOptions()
        option.add_argument('headless')  # 设置option,后台运行
        driver = webdriver.Chrome(chrome_options=option)
        driver.get(target)
        js = "var q=document.documentElement.scrollTop=1500"
        driver.execute_script(js)
        selenium_page = driver.page_source
        driver.quit()
        soup = BeautifulSoup(selenium_page, 'html.parser')
        cities = soup.find('div', {'class': 'areaBox___3jZkr'})
        # 每个省
        protocols = cities.find_all('div')
        data = {}

        for i in protocols:
            try:
                first = i.find('div', {'class': 'areaBlock1___3V3UU'})
                content = first.find_all('p')
                name = content[0].get_text()
                num = content[1].get_text()
                if num == "":
                    num = 0
                data['{}'.format(name)] = num
            except AttributeError as e:
                continue
    except:
        data = {}


    context = {
        'data': data
    }
    return render(request, 'china-wuhan.html', context=context)

 

python爬取丁香园的疫情数据绘制Echarts地图_第1张图片

1、软件准备

1、Django2.1
2、request
3、bs4

 

import requests
from bs4 import BeautifulSoup

target = 'https://3g.dxy.cn/newh5/view/pneumonia?scene=2&clicktime=1579579384&enterid=1579579384&from=groupmessage&isappinstalled=0'
req = requests.get(url=target)
req.encoding = 'urf-8'
html = req.text
soup = BeautifulSoup(html, 'html.parser')
cities = soup.find('div', {'class': 'areaBox___3jZkr'})
# 每个省
protocols = cities.find_all('div')
data = {}

for i in protocols:
    try:
        first = i.find('div', {'class': 'areaBlock1___3V3UU'})
        content = first.find_all('p')
        name = content[0].get_text()
        num = content[1].get_text()
        print(num)
        if num == "":
            num = 0
        data['{}'.format(name)] = num
        print('疫情:', name, '确诊', num, '例')
    except AttributeError as e:
        continue
print(data)

拿到的数据大概是这样

{'湖北': '549', '广东': '53', '浙江': '43', '北京': '26', '上海': '20', '湖南': '9', '安徽': '15', '重庆': '27', '四川': '15', '山东': '9', '广西': '13', '福建': '5', '江苏': '9', '河南': '9', '海南': '8', '天津': '5', '江西': '7', '陕西': '3', '贵州': '3', '辽宁': '3', '香港': '2', '黑龙江': '4', '澳门': '2', '新疆': '2', '甘肃': '2', '云南': '2', '台湾': '1', '山西': '1', '吉林': '3', '河北': '2', '宁夏': '1', '内蒙古': 0}

这是一个字典

然后交给Django返回

def china_wuhan(request):
    import requests
    from bs4 import BeautifulSoup

    target = 'https://3g.dxy.cn/newh5/view/pneumonia?scene=2&clicktime=1579579384&enterid=1579579384&from=groupmessage&isappinstalled=0'
    req = requests.get(url=target)
    req.encoding = 'urf-8'
    html = req.text
    soup = BeautifulSoup(html, 'html.parser')
    cities = soup.find('div', {'class': 'areaBox___3jZkr'})
    # 每个省
    protocols = cities.find_all('div')
    data = {}

    for i in protocols:
        try:
            first = i.find('div', {'class': 'areaBlock1___3V3UU'})
            content = first.find_all('p')
            name = content[0].get_text()
            num = content[1].get_text()
            print(num)
            if num == "":
                num = 0
            data['{}'.format(name)] = num
        except AttributeError as e:
            continue
    context = {
       'data': data
    }
    return render(request, 'china-wuhan.html', context=context)

然后就来到了html,你可以理解为django返回了一个字典格式

对于echarts来说,它需要的数据格式是

var datavirus=[
            {name:"南海诸岛",value: 0},
            {name: '北京', value: 14},
            {name: '天津', value: 4},
            {name: '上海', value: 16},
            {name: '重庆', value: 9},
            {name: '河北', value: 1},
            ...
            ...
]
            

那么从django返回数据就是这样用

data: [{% for key,value in data.items%}
          {value:{{ value }}, name:'{{ key }}'},
       {% endfor %}
      ]

展示链接https://www.guanacossj.com/china-wuhan/

下面放一下完整的html代码,自行修改




    
    
    武汉加油
    
    
    
    
    


    

 

你可能感兴趣的:(Django,python,爬虫)