jsonpath的安装及使用方式:
pip安装:
pip install jsonpath
jsonpath的使用:
obi = json.load(open("json文件','r'encoding="utf-8'))
ret = jsonpath.jsonpath(obj,"jsonpath语法')
XPath | JSONPath | Description |
---|---|---|
/ | $ | 表示根元素 |
. | @ | 当前元素 |
/ | . or [] | 子元素 |
… | n/a | 父元素 |
// | … | 递归下降,JSONPath是从E4X借鉴的。 |
* | * | 通配符,表示所有的元素 |
@ | n/a | 属性访问字符 |
[] | [] | 子元素操作符 |
| | [,] | 连接操作符在XPath 结果合并其它结点集合。JSONP允许name或者数组索引。 |
n/a | [startstep] | 数组分割操作从ES4借鉴。 |
[] | ?() | 应用过滤表示式 |
n/a | () | 脚本表达式,使用在脚本引擎下面。 |
() | n/a | Xpath分组 |
jsonpath的基本使用
{ "store": {
"book": [
{ "category": "reference",
"author": "Nigel Rees",
"title": "Sayings of the Century",
"price": 8.95
},
{ "category": "fiction",
"author": "Evelyn Waugh",
"title": "Sword of Honour",
"price": 12.99
},
{ "category": "fiction",
"author": "Herman Melville",
"title": "Moby Dick",
"isbn": "0-553-21311-3",
"price": 8.99
},
{ "category": "fiction",
"author": "J. R. R. Tolkien",
"title": "The Lord of the Rings",
"isbn": "0-395-19395-8",
"price": 22.99
}
],
"bicycle": {
"author": "J. Tolkien",
"color": "red",
"price": 19.95
}
}
}
import json
import jsonpath
obj = json.load(open('store.json', 'r', encoding='utf-8'))
# 书店所有书的作者
# author_list = jsonpath.jsonpath(obj, '$.store.book[*].author')
# print(author_list)
# 所有的作者
# author_list = jsonpath.jsonpath(obj, '$.store..author')
# print(author_list)
# store的所有元素。所有的bookst和bicycle
# author_list = jsonpath.jsonpath(obj, '$.store.*')
# print(author_list)
# store里面所有东西的price
# author_list = jsonpath.jsonpath(obj, '$.store..price')
# print(author_list)
# 第三个书
# author_list = jsonpath.jsonpath(obj, '$..book[2]')
# print(author_list)
# 最后一本书
# author_list = jsonpath.jsonpath(obj, '$..book[(@.length-1)]')
# print(author_list)
# 前面的两本书。
# author_list = jsonpath.jsonpath(obj, '$..book[:2]')
# print(author_list)
# author_list = jsonpath.jsonpath(obj, '$..book[0,1]')
# print(author_list)
# 过滤出所有的包含isbn的书。
# author_list = jsonpath.jsonpath(obj, '$..book[?(@.isbn)]')
# print(author_list)
# 过滤出价格低于10的书。
# author_list = jsonpath.jsonpath(obj, '$..book[?(@.price<10)]')
# print(author_list)
# 所有元素。
author_list = jsonpath.jsonpath(obj, '$..*')
print(author_list)
JsonPath解析淘票票网页城市的json案例
import urllib.request
url = 'https://dianying.taobao.com/cityAction.json?activityId&_ksTS=1695050371169_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true'
headers = {
'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
# 'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Bx-V': '2.5.3',
'Cookie': 't=5735d5d603eb306365806719d8cbc71d; cookie2=1f8f07fa7d0587561e7b7c595f0847a7; v=0; _tb_token_=5e585349e0ebb; cna=4IuNHejGryoCAXM8lO26wJQN; xlly_s=1; tb_city=110100; tb_cityName="sbG+qQ=="; tfstk=dKKMFUvbcF71geyV-cIsjflCpC3df5sf8IEAMiCq865BDd36HmoDtQ_4DVsOoiAF9hC9BGy6RQpzXh31Doj1cil-w0n85dsf0-c5ZCVGCAFBRci-2d9_BOkJL0H_CqmVfOUTZiX3xTbZ9ryAnFHP4NfyL4Z4mV6_w_paCoE2xB7GQq4Uzg1_c9kvYraf796hw_f_iM5..; isg=BIWF8gjdTAyZJWhQk_C23KYRlMG_QjnUG_gTg4frF7zLHqeQT5fdpnXwKULob1GM; l=fBSEFtZVPjM7SEAvBO5aourza77tUIObzAVzaNbMiIEGa6KftFMzjNCt6oleSdtxgT5ApetyVhmX9dEMzjU_WdsWHpfuKtyuJF9wReM3N7AN.',
'Referer': 'https://dianying.taobao.com/?spm=a1z21.3046609.city.1.16d6112a6Omrhj&city=110100',
'Sec-Ch-Ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': '"Windows"',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}
request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
# content中前面有json109(后面也有)所有要切割split
content = content.split('(')[1].split(')')[0]
with open('taopp.json', 'w', encoding='utf-8') as fp:
fp.write(content)
import json
import jsonpath
obj = json.load(open('taopp.json', 'r', encoding='utf-8'))
city_list = jsonpath.jsonpath(obj, '$..regionName')
print(city_list)