Python爬虫学习之解析_jsonpath

 一、jsonpath的基本使用

import json

import jsonpath

obj = json.load(open('json文件','r',encoding='utf-8'))

ret = jsonpath.jsonpath(obj,'jsonpath语法')

二、jsonpath语法

Python爬虫学习之解析_jsonpath_第1张图片

e.g.

import json
import jsonpath

obj = json.load(open('jsonpath.json','r',encoding='utf-8'))

#书店所有的书的作者

author_list = jsonpath.jsonpath(obj,'$.store.book[*].author')
print(author_list)

#所有的作者
author_list = jsonpath.jsonpath(obj,"$..author")
print(author_list)

#store下面所有的元素
tag_list = jsonpath.jsonpath(obj,'$.store.*')
print(tag_list)

#store里面所有东西的price
price_list = jsonpath.jsonpath(obj,'$.store..price')
print(price_list)

#第二本书
book = jsonpath.jsonpath(obj,'$..book[1]')
print(book)

#最后一本书
book = jsonpath.jsonpath(obj,'$..book[(@.length-1)]')
print(book)

#前面两本书
#book_list = jsonpath.jsonpath(obj,'$..book[0,1]')
book_list = jsonpath.jsonpath(obj,'$..book[:2]')
print(book_list)

#条件过滤需要在()前面加一个?
#过滤出所有包含isbnd的书
book_list = jsonpath.jsonpath(obj,'$..book[?(@.isbn)]')
print(book_list)

#过滤出价格大于10元的书
book_list = jsonpath.jsonpath(obj,'$..book[?(@.price>10)]')
print(book_list)

 三、jsonpath的应用

e.g.用jsonpath获取淘票票里所有的有开通服务的城市

import urllib.request

url = "https://dianying.taobao.com/cityAction.json?activityId&_ksTS=1706267255171_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true"

headers = {
    # ':authority':'dianying.taobao.com',
    # ':method':'GET',
    # ':path':'/cityAction.json?activityId&_ksTS=1706267255171_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true',
    # ':scheme':'https',
    'Accept':'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
    # 'Accept-Encoding':'gzip, deflate, br',
    'Accept-Language':'zh-CN,zh;q=0.9',
    'Bx-V':'2.5.10',
    'Cookie':'t=64523a4057c364f895e0fde2532013c6; cookie2=15b03a835ef4f0d11750ddce172c7df9; v=0; _tb_token_=e59eb1338be3b; cna=sn06HiZsnGECAQAAAABYf/O6; xlly_s=1; tb_city=350500; tb_cityName="yKrW3Q=="; tfstk=eUGp3os5FhxH1fs4IBpia1u5vX8MnX3e-DufZuqhFcnt0DLUE9DnegEtlerHUpo-B0iaKkG-z_USP08UqeJi82PzNnx0wI0E8DFT6QAcC2Cqd7tDmCXG5J-3Nvf_H6jjL1pJq1fZ_bUBh0FFGQl0WyeL2_qOw8GNiREXul5fGkULpa0QX_1S6g8mijBWniqYr9T9WTWzdPSNdEQ-WK5x2PEDSOXPUR4aWoY9WTWzdPzTmFAGUTyg7; isg=BAIC-i520256MM-y8z-Fp5vTUwhk0wbtainyVUwaf3Ugn6YZNGN-_b6RT5vjz36F',
    'Referer':'https://dianying.taobao.com/',
    'Sec-Ch-Ua':'"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'Sec-Ch-Ua-Mobile':'?0',
    'Sec-Ch-Ua-Platform':'"Windows"',
    'Sec-Fetch-Dest':'empty',
    'Sec-Fetch-Mode':'cors',
    'Sec-Fetch-Site':'same-origin',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'X-Requested-With':'XMLHttpRequest'
}

request = urllib.request.Request(url = url,headers = headers)

response = urllib.request.urlopen(request)

content = response.read().decode('utf-8')

#split切割
content = content.split('(')[1].split(")")[0]

fp = open('jsonpath应用之淘票票.json','w',encoding='utf-8')
fp.write(content)

import json
import jsonpath

obj = json.load(open('jsonpath应用之淘票票.json','r',encoding='utf-8'))

city_list = jsonpath.jsonpath(obj,'$..regionName')

print(city_list)

你可能感兴趣的:(python,爬虫,笔记)