python递归解析JSON

知识点:
1、编码设置、特殊字符处理
2、正则表达式提取JSON字符串
3、递归打印JSON属性值

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import os, sys, time
import urllib.request, requests, bs4
import re, json, demjson
import importlib

# 设置utf-8编码
importlib.reload(sys)

# 特殊字符处理
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)

'''
断点打印
'''
def dump(msg):
   print(msg)
   os._exit(0)


'''
解析jsonp格式为json
'''
def loads_jsonp(jsonp):
   #jsonp = 'jsonp1({"code": 0,"msg": "","times": 1570073177610})'
   return json.loads(re.match(".*?({.*}).*", str(jsonp), re.S).group(1))


'''
下载文件
'''
def downfiles(imglist):
   #fname = time.strftime("%Y%m%d%H%M%S", time.localtime()) # 日期命名
   x = 0
   # 遍历
   for imgurl in imglist:
       # 获取获得的从imglist中遍历得到的imgurl
       imgres = requests.get(imgurl)
       fname = imgurl.split('/')[-1]
       with open("D:\\360Downloads\\{}.jpg".format(fname), "wb") as f:
           f.write(imgres.content)
           x += 1
           print("第", x ,"张")
   print("下载完毕") 


'''
读取详情页
'''
def getdetails(url):
   res = requests.get(url)
   downloadedList = [] # 下载网址列表

   res.raise_for_status()
   html = bs4.BeautifulSoup(res.text, 'html5lib')
   data = html.select('.reveal-work-wrap > img') # 返回数组

   for path in data:
      target = path.get('src') # 返回src属性
      target = target.split('@')[0] # 图片路径处理
      downloadedList.append(target) # 加入全局数组
      print(target)

   downfiles(downloadedList)

'''
获取页面源码
'''
def geturl(url):
   res = requests.get(url)
   res.raise_for_status()
   html = bs4.BeautifulSoup(res.text, 'html5lib')
   return html

'''
解析json,仅一层
'''
def printjson(json):
   obj = demjson.decode(json)
   
   for name in obj:
      print(name, ':', obj[name])

'''
递归解析json
'''
def dict_generator(indict, pre=None):
    pre = pre[:] if pre else []
    if isinstance(indict, dict):
        for key, value in indict.items():
            if isinstance(value, dict):
                if len(value) == 0:
                    yield pre+[key, '{}']
                else:
                    for d in dict_generator(value, pre + [key]):
                        yield d
            elif isinstance(value, list):
                if len(value) == 0:                   
                    yield pre+[key, '[]']
                else:
                    for v in value:
                        for d in dict_generator(v, pre + [key]):
                            yield d
            elif isinstance(value, tuple):
                if len(value) == 0:
                    yield pre+[key, '()']
                else:
                    for v in value:
                        for d in dict_generator(v, pre + [key]):
                            yield d
            else:
                yield pre + [key, value]
    else:
        yield indict


'''
打印目标类型
'''
def typeof(target):
   print(type(target))

#jsonp = 'jsonp1({"code": 0,"msg": "","times": 1570073177610})'
jsonp = geturl('http://acsing.kugou.com/sing7/web/jsonp/cdn/opus/listenGetData?callback=jsonp1&data=OTkwMDY4MDk0&sign=84d875624381eda2b448b082e22d2eb7&channelId=0&_=1570073173877:formatted')

result = re.match(r'.*?({.*}).*', str(jsonp), re.S).group(1)

# 利用递归解析json
result = result.translate(non_bmp_map) #特殊字符处理
sValue = json.loads(result)
for i in dict_generator(sValue):
   print('.'.join(i[0:-1]), ':', i[-1])
   
os._exit(0)



你可能感兴趣的:(python,python,json,爬虫)