解析-JsonPath

解析-JsonPath

jsonpath的安装及使用方式:
	pip安装:
		pip install jsonpath
	jsonpath的使用:
		obi = json.load(open("json文件','r'encoding="utf-8'))
		ret = jsonpath.jsonpath(obj,"jsonpath语法')
XPath JSONPath Description
/ $ 表示根元素
. @ 当前元素
/ . or [] 子元素
n/a 父元素
// 递归下降,JSONPath是从E4X借鉴的。
* * 通配符,表示所有的元素
@ n/a 属性访问字符
[] [] 子元素操作符
| [,] 连接操作符在XPath 结果合并其它结点集合。JSONP允许name或者数组索引。
n/a [startstep] 数组分割操作从ES4借鉴。
[] ?() 应用过滤表示式
n/a () 脚本表达式,使用在脚本引擎下面。
() n/a Xpath分组

jsonpath的基本使用

{ "store": {
    "book": [
      { "category": "reference",
        "author": "Nigel Rees",
        "title": "Sayings of the Century",
        "price": 8.95
      },
      { "category": "fiction",
        "author": "Evelyn Waugh",
        "title": "Sword of Honour",
        "price": 12.99
      },
      { "category": "fiction",
        "author": "Herman Melville",
        "title": "Moby Dick",
        "isbn": "0-553-21311-3",
        "price": 8.99
      },
      { "category": "fiction",
        "author": "J. R. R. Tolkien",
        "title": "The Lord of the Rings",
        "isbn": "0-395-19395-8",
        "price": 22.99
      }
    ],
    "bicycle": {
      "author": "J. Tolkien",
      "color": "red",
      "price": 19.95
    }
  }
}
import json
import jsonpath

obj = json.load(open('store.json', 'r', encoding='utf-8'))

# 书店所有书的作者
# author_list = jsonpath.jsonpath(obj, '$.store.book[*].author')
# print(author_list)

# 所有的作者
# author_list = jsonpath.jsonpath(obj, '$.store..author')
# print(author_list)

# store的所有元素。所有的bookst和bicycle
# author_list = jsonpath.jsonpath(obj, '$.store.*')
# print(author_list)

# store里面所有东西的price
# author_list = jsonpath.jsonpath(obj, '$.store..price')
# print(author_list)

# 第三个书
# author_list = jsonpath.jsonpath(obj, '$..book[2]')
# print(author_list)

# 最后一本书
# author_list = jsonpath.jsonpath(obj, '$..book[(@.length-1)]')
# print(author_list)

# 前面的两本书。
# author_list = jsonpath.jsonpath(obj, '$..book[:2]')
# print(author_list)

# author_list = jsonpath.jsonpath(obj, '$..book[0,1]')
# print(author_list)

# 过滤出所有的包含isbn的书。
# author_list = jsonpath.jsonpath(obj, '$..book[?(@.isbn)]')
# print(author_list)

# 过滤出价格低于10的书。
# author_list = jsonpath.jsonpath(obj, '$..book[?(@.price<10)]')
# print(author_list)

# 所有元素。
author_list = jsonpath.jsonpath(obj, '$..*')
print(author_list)

JsonPath解析淘票票网页城市的json案例

import urllib.request

url = 'https://dianying.taobao.com/cityAction.json?activityId&_ksTS=1695050371169_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true'

headers = {
    'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
    # 'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Bx-V': '2.5.3',
    'Cookie': 't=5735d5d603eb306365806719d8cbc71d; cookie2=1f8f07fa7d0587561e7b7c595f0847a7; v=0; _tb_token_=5e585349e0ebb; cna=4IuNHejGryoCAXM8lO26wJQN; xlly_s=1; tb_city=110100; tb_cityName="sbG+qQ=="; tfstk=dKKMFUvbcF71geyV-cIsjflCpC3df5sf8IEAMiCq865BDd36HmoDtQ_4DVsOoiAF9hC9BGy6RQpzXh31Doj1cil-w0n85dsf0-c5ZCVGCAFBRci-2d9_BOkJL0H_CqmVfOUTZiX3xTbZ9ryAnFHP4NfyL4Z4mV6_w_paCoE2xB7GQq4Uzg1_c9kvYraf796hw_f_iM5..; isg=BIWF8gjdTAyZJWhQk_C23KYRlMG_QjnUG_gTg4frF7zLHqeQT5fdpnXwKULob1GM; l=fBSEFtZVPjM7SEAvBO5aourza77tUIObzAVzaNbMiIEGa6KftFMzjNCt6oleSdtxgT5ApetyVhmX9dEMzjU_WdsWHpfuKtyuJF9wReM3N7AN.',
    'Referer': 'https://dianying.taobao.com/?spm=a1z21.3046609.city.1.16d6112a6Omrhj&city=110100',
    'Sec-Ch-Ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
    'Sec-Ch-Ua-Mobile': '?0',
    'Sec-Ch-Ua-Platform': '"Windows"',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
    }

request = urllib.request.Request(url=url, headers=headers)

response = urllib.request.urlopen(request)

content = response.read().decode('utf-8')

# content中前面有json109(后面也有)所有要切割split
content = content.split('(')[1].split(')')[0]

with open('taopp.json', 'w', encoding='utf-8') as fp:
    fp.write(content)

import json
import jsonpath

obj = json.load(open('taopp.json', 'r', encoding='utf-8'))

city_list = jsonpath.jsonpath(obj, '$..regionName')

print(city_list)

你可能感兴趣的:(Python爬虫,python)