Python爬虫---解析---JSONPath

Xpath可以解析本地文件和服务器响应的文件,JSONPath只能解析本地文件

1. 安装jsonpath:pip install jsonpath

注意:需要安装在python解释器相同的位置,例如:D:\Program Files\Python3.11.4\Scripts

2. 使用步骤

2.1 导入:import jsonpath

2.2 使用:

示例1:

# 导包
import jsonpath
import json

obj = json.load(open("1224-解析-jsonpath.json", "r", encoding="utf-8"))
# print(obj)

# 书店所有书的作者
author_list = jsonpath.jsonpath(obj,"$.store.book[*].author")
print(author_list)

# 所有的作者
all_author_list= jsonpath.jsonpath(obj,"$..author")
print(all_author_list)

# store下面的所有元素
tag_list = jsonpath.jsonpath(obj,"$.store.*")
print(tag_list)

# store里面所有东西的price
price_list= jsonpath.jsonpath(obj,"$.store..price")
print(price_list)

# 第三个书
book= jsonpath.jsonpath(obj,"$..book[2]")
print(book)

# 最后一本书
end_book = jsonpath.jsonpath(obj, "$..book[(@.length-1)]")
print(end_book)

# 前两本书
# before_two_book = jsonpath.jsonpath(obj,"$..book[0,1]")
before_two_book = jsonpath.jsonpath(obj,"$..book[:2]")
print(before_two_book)

# 过滤出所有包含isbn的书-------条件过滤需要在 () 前面加个 ?
contain_isbn_book = jsonpath.jsonpath(obj, "$..book[?(@.isbn)]")
print(contain_isbn_book)

# 哪本书超过了10块钱
over_ten_book = jsonpath.jsonpath(obj,"$..book[?(@.price>10)]")
print(over_ten_book)

json文件:

{
  "store": {
    "book": [
      {
        "category": "修真",
        "author": "六道",
        "title": "坏蛋是怎样练成的",
        "price": 8.95
      },
      {
        "category": "修真",
        "author": "天蚕土豆",
        "title": "斗破苍穹",
        "price": 12.99
      },
      {
        "category": "修真",
        "author": "唐家三少",
        "title": "斗罗大陆",
        "isbn": "0-553-21311-3",
        "price": 8.99
      },
      {
        "category": "修真",
        "author": "南派三叔",
        "title": "星辰变",
        "isbn": "0-395-19395-8",
        "price": 22.99
      }
    ],
    "bicycle": {
      "color": "黑色",
      "price": 19.95
    }
  }
}

示例2:获取淘票票城市名称

import json
import jsonpath
import urllib.request

url = "https://dianying.taobao.com/cityAction.json?activityId&_ksTS=1703418735341_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true"

headers = {
    # 带 冒号 的不能用
    # ':authority': 'dianying.taobao.com',
    # ':method': 'GET',
    # ':path': '/cityAction.json?activityId&_ksTS=1703418735341_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true',
    # ':scheme': 'https',
    'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
    # 这行也不能用
    # 'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Bx-V': '2.5.6',
    'Cookie': 'cna=lZ14HbG2yWQCAQ6bDW7ouWON; t=017bbe5210a298f352a8db14812a276e; cookie2=1441fe3ded83f315b1b19dfc2b9a9e05; v=0; _tb_token_=eb85057357ee1; xlly_s=1; tb_city=110100; tb_cityName="sbG+qQ=="; l=fBxZpp6rNqEgeUwFBO5Clurza77TFIOb4sPzaNbMiIEGa1RP9F_IcNCO6tx2WdtjgTCYLetPx-igEdLHR3VMCc0c07kqm05KFxvtaQtJe; tfstk=eM463YjsPdv12gxMdO1UFp6SAG3fTR_yhIGYZSLwMV3tGZw4LCPagPrIM-yhi10aMxwbLv2jm1DqHxw4Ml5FzaPgsq0xhT7PzJCuyP6y925dL50mk9Rekg77s87O5ASeMhJeUjOtOEaB4ilGt9lBkEUOocMW0vcntysmjYFT5ELYsfise5HC4i8rFAgWcBiklXMPO6tDmF8jN-kGwsXtXXc1U61BInmttXMAO6tkIchn1WfCO3xl.; isg=BKWlkniMLCwIKUkEEWtg7T3NtGHf4ll0cWg1jKeLS1zrvsUwbzLcRHbQSCLIvnEs',
    'Referer': 'https://dianying.taobao.com/?spm=a1z21.3046609.city.1.32c0112aGBdQw9&city=110100',
    'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"Sec-Ch-Ua-Mobile:?0',
    'Sec-Ch-Ua-Platform': '"Windows"',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
}

# 请求对象的定制
request = urllib.request.Request(url=url, headers=headers)

# 模拟浏览器向服务器发起请求
response = urllib.request.urlopen(request)
# 获取响应结果
content = response.read().decode("utf-8")
# print(content)   # 打印出来的内容 jsonp109(); 需要去掉,使用split切割

# split切割
content = content.split("(")[1].split(")")[0]
# print(content)

# json写入文件
with open("1224-解析-jsonpath解析淘票票.json", "w", encoding="utf-8") as fp:
    fp.write(content)

obj = json.load(open("1224-解析-jsonpath解析淘票票.json", "r", encoding="utf-8"))

# 获取所有regionName
city_list = jsonpath.jsonpath(obj, "$..regionName")
print(city_list)

你可能感兴趣的:(python,开发语言)