爬虫数据保存到EXCEL并绘图

import requests
import json


#获取json数据
china_url = "https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5"
header = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66",
          "referer":"https://news.qq.com/"}
reponse = requests.get(china_url,header).json()
# print(reponse)

# 把json转成python中的字典
data_dict = json.loads(reponse['data'])
# 把字典转成json保存
data_json = json.dumps(data_dict,ensure_ascii=False,indent=2)
#保存数据
with open('C:/Users/wensong/Desktop/学习测试/china.json','w') as file:
    file.write(data_json)




import pandas as pd
#读取数据
with open('C:/Users/wensong/Desktop/学习测试/china.json','r') as file:
    data_py = file.read()

data_js = json.loads(data_py)
# print(data_js)

#获取国内全部数据
data_china = data_js['areaTree'][0]
#print(data_china)
#获取省份全部数据
data_province = data_china['children']
# print(data_province)
# print(len(data_province))

#获取每个省的数据
chinaTotal = []
for i in range(len(data_province)):
    province_name = data_province[i]['name']
    province_list = data_province[i]['children']

    for j in range(len(province_list)):
        city_name = province_list[j]['name']
        total = province_list[j]['total']
        today = province_list[j]['today']
        city_dict = {
            "province":province_name,
            "city":city_name,
            "today":today,
            "total":total
        }
        chinaTotal.append(city_dict)
# print(chinaTotal)
chinaTotalData = pd.DataFrame(chinaTotal)
# print(chinaTotalData)

#添加total更详细的数据
confirmlist,suspectlist,deadlist,heallist,deadratelist,healratelist = [],[],[],[],[],[]
#取出total详细数据并保存在列表中
for value in chinaTotalData['total'].values.tolist():
    confirmlist.append(value['confirm'])
    suspectlist.append(value['suspect'])
    deadlist.append(value['dead'])
    heallist.append(value['heal'])
    deadratelist.append(value['deadRate'])
    healratelist.append(value['healRate'])
#添加total详细数据到总表中
chinaTotalData['confirm'] = confirmlist
chinaTotalData['suspect'] = suspectlist
chinaTotalData['dead'] = deadlist
chinaTotalData['heal'] = heallist
chinaTotalData['deadRate'] = deadratelist
chinaTotalData['healRate'] = healratelist

#添加today中的详细数据
confirmtodaylist,confirmcutslist = [],[]
#取出today详细数据并保存在列表中
for value in chinaTotalData['today'].values.tolist():
    confirmtodaylist.append(value['confirm'])
    confirmcutslist.append(value['confirmCuts'])
chinaTotalData['confirmtoday'] = confirmtodaylist
chinaTotalData['confirmCuts'] = confirmcutslist

#添加了total详细数据就可删除原有的‘total’
chinaTotalData.drop(['total'],axis=1,inplace=True) # axis=1表示以列的形式删除,inplace=True表示在原有的基础上删除
#添加了today详细数据就可删除原有的‘today’
chinaTotalData.drop(['today'],axis=1,inplace=True)
# print(chinaTotalData)

import openpyxl
#把整理好的数据放到EXCEL中
book = openpyxl.load_workbook('C:/Users/wensong/Desktop/学习测试/国内疫情数据.xlsx')
writer = pd.ExcelWriter('C:/Users/wensong/Desktop/学习测试/国内疫情数据.xlsx',engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title,ws) for ws in book.worksheets)
chinaTotalData.to_excel(writer,index=False)
writer.save()
writer.close()




from pyecharts import options as opts
from pyecharts.charts import Map

df = pd.read_excel('C:/Users/wensong/Desktop/学习测试/国内疫情数据.xlsx')
# 拿到全国每个省的总数据
data_sum = df.groupby(by='province',as_index=False).sum()
# 把全国的数据转成以省为单位 [("湖北",1),("江西",2)...]
data_list = list(zip(data_sum['province'].values.tolist(),data_sum['confirm'].values.tolist()))

def map_china():
    C = (
        Map()
        .add(series_name="确诊病例",data_pair=data_list,maptype="china") # 绘制中国地图
        .set_global_opts(
            title_opts=opts.TitleOpts(title="疫情地图"),  # 设置标题
            visualmap_opts=opts.VisualMapOpts(is_piecewise=True,  #以不同的颜色显示数据
                pieces=[{"max":9,"min":0,"label":"0-9","color":"#FFE4E1"},
                        {"max":99,"min":10,"label":"10-99","color":"#FF7F50"},
                        {"max": 499, "min": 100, "label": "100-499", "color": "#F08080"},
                        {"max": 999, "min": 500, "label": "500-999", "color": "#CD5C5C"},
                        {"max": 9999, "min": 1000, "label": "1000-99999", "color": "#990000"},
                        {"max": 999999, "min": 100000, "label": ">=10000", "color": "#660000"}]
                 )
        )
    )
    return C
d_map = map_china()
d_map.render('C:/Users/wensong/Desktop/学习测试/国内疫情图.html')

















你可能感兴趣的:(爬虫数据保存到EXCEL并绘图)