Python 爬取国家统计局相关数据

一,参考 https://www.jianshu.com/p/9827a052da91,对年末总人口进行爬取,同时用matplotlib 进行直方图绘制

代码:

# -*- coding: utf-8 -*-
"""
Created on Fri Mar  1 09:19:33 2019

@author: Lenovo
"""

import requests
import time
import json
import matplotlib.pyplot as plt

#用来获得 时间戳
def gettime():
    return int(round(time.time()*1000))

if __name__=='__main__':
    "一,请求数据"
    #用来定义头部
    headers={}
    #用来传递参数
    keyvalue={}
    #目标网址
    url='http://data.stats.gov.cn/easyquery.htm'
    
    #头部填充
    headers['User-Agent']='Mozilla/5.0 (Windows NT 10.0; Win64; x64) '\
                            'AppleWebKit/537.36 (KHTML, like Gecko)'\
                            'Chrome/70.0.3538.102 Safari/537.36'
                            
    #参数填充
    keyvalue['m'] = 'QueryData'
    keyvalue['dbcode'] = 'hgnd'
    keyvalue['rowcode'] = 'zb'
    keyvalue['colcode'] = 'sj'
    keyvalue['wds'] = '[]'
    keyvalue['dfwds'] = '[{"wdcode":"zb","valuecode":"A0301"}]'
    keyvalue['k1'] = str(gettime())
    
    # 发出请求,使用get方法,这里使用我们自定义的头部和参数
    r = requests.get(url, headers=headers, params=keyvalue)

    "二,解析数据"
    year=[]
    population=[]
    data=json.loads(r.text)
    data_one = data['returndata']['datanodes']
    for value in data_one:
        if('A030101_sj' in value['code']):
            year.append(value['code'][-4:])
            population.append(int(value['data']['strdata']))
    
    print(year)
    print(population)
    
    "三,绘制数据"
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    plt.bar(year,population)
    plt.xlabel(u'年份')
    plt.ylabel(u'万人')
    plt.title(u'年末总人口')
    plt.show()

效果:

Python 爬取国家统计局相关数据_第1张图片

二,同理 经过简单的修改。即可对其它数据,如福利彩票销售总额进行爬取

不同数据 只是在 url参数,和json解析时略微不同

代码:

# -*- coding: utf-8 -*-
"""
Created on Fri Mar  1 13:56:43 2019

@author: Lenovo
"""

# -*- coding: utf-8 -*-
"""
Created on Fri Mar  1 09:19:33 2019

@author: Lenovo
"""

import requests
import time
import json
import matplotlib.pyplot as plt

#用来获得 时间戳
def gettime():
    return int(round(time.time()*1000))

if __name__=='__main__':
    "一,请求数据"
    #用来定义头部
    headers={}
    #用来传递参数
    keyvalue={}
    #目标网址
    url='http://data.stats.gov.cn/easyquery.htm'
    
    #头部填充
    headers['User-Agent']='Mozilla/5.0 (Windows NT 10.0; Win64; x64) '\
                            'AppleWebKit/537.36 (KHTML, like Gecko)'\
                            'Chrome/70.0.3538.102 Safari/537.36'
                            
    #参数填充
    keyvalue['m'] = 'QueryData'
    keyvalue['dbcode'] = 'hgnd'
    keyvalue['rowcode'] = 'zb'
    keyvalue['colcode'] = 'sj'
    keyvalue['wds'] = '[]'
    keyvalue['dfwds'] = '[{"wdcode":"zb","valuecode":"A0P09"}]'
    keyvalue['k1'] = str(gettime())
    
    # 发出请求,使用get方法,这里使用我们自定义的头部和参数
    r = requests.get(url, headers=headers, params=keyvalue)

    "二,解析数据"
    year=[]
    population=[]
    data=json.loads(r.text)
    data_one = data['returndata']['datanodes']
    for value in data_one:
        if('A0P0902_sj' in value['code']):
            year.append(value['code'][-4:])
            population.append(float(value['data']['strdata']))
    
    print(year)
    print(population)
    
    "三,绘制数据"
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    plt.bar(year,population)
    plt.xlabel(u'年份')
    plt.ylabel(u'亿元')
    plt.title(u'福利彩票销售额')
    plt.show()

效果:

Python 爬取国家统计局相关数据_第2张图片

你可能感兴趣的:(其它,python,爬虫,国际统计局)