全国区市县 字典下载


def dicurlopen(dicurl):

    head = {}

    head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
    head['Accept-Language'] = 'zh-CN,zh;q=0.9'
    head['Cache-Control'] = 'no-cache'
    head['Connection'] = 'keep-alive'
    head['Content-Type'] = 'application/x-www-form-urlencoded'
    head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540682204'
    head['Host']='business.hcp66.com'
    head['Pragma']='no-cache'
    head['Referer']='http://business.hcp66.com/member/index/shop.html'
    head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'


    html = urllib.request.urlopen(dicurl)

    html = html.read()

    return html
    




dict_data = {}

dicurl ='http://business.hcp66.com/api/web/getsubcity/cityid/1'

con = 0

for i in range(0,493):
    con = con+1
    dicurl = 'http://business.hcp66.com/api/web/getsubcity/cityid/{}'.format(i)
    data = dicurlopen(dicurl)
    data = data.decode('utf-8')
    dict_id = re.findall(r'"id":"(\d*?)"',data)
    dict_name = re.findall(r'"city":"(.*?)"',data)



    for i in range(len(dict_id)):
        
        ff = dict_name[i].encode('utf-8').decode('unicode_escape')
        dict_data[ff] = dict_id[i]


    print('第{}个省份OK'.format(con))


file = open('name.pkl','wb')

pickle.dump(dict_data,file)

file.close()

你可能感兴趣的:(爬虫)