解析_Jsonpath基本使用及实战

解析_Jsonpath

  • 前言
  • 一、Jsonpath安装
  • 二、使用步骤
    • 1、基本使用
    • 2、实战


前言

jsonpath是使用一种简单的方法提取给定的json文档的部分内容,我们做接口测试时,目前主要流行的数据结构是json,遇到复杂的json格式,使用jsonpath提取数据

一、Jsonpath安装

bogon:bin yingyan$ pip3 install jsonpath -i https://pypi.douban.com/simple/

pycharm添加jsonpath库

二、使用步骤

1、基本使用

数据:douban1.json

{
  "subjects": [
    {
      "episodes_info": "",
      "rate": "8.0",
      "cover_x": 5000,
      "title": "巴比伦",
      "url": "https:\/\/movie.douban.com\/subject\/34467461\/",
      "playable": false,
      "cover": "https://img1.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p2884457470.jpg",
      "id": "34467461",
      "cover_y": 7407,
      "is_new": false
    },
    {
      "episodes_info": "",
      "rate": "7.4",
      "cover_x": 1800,
      "title": "塔尔",
      "url": "https:\/\/movie.douban.com\/subject\/35430833\/",
      "playable": false,
      "cover": "https://img9.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p2883951104.jpg",
      "id": "35430833",
      "cover_y": 2570,
      "is_new": false
    },
    {
      "episodes_info": "",
      "rate": "8.5",
      "cover_x": 1000,
      "title": "西线无战事",
      "url": "https:\/\/movie.douban.com\/subject\/3042261\/",
      "playable": false,
      "cover": "https://img9.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p2879787106.jpg",
      "id": "3042261",
      "cover_y": 1500,
      "is_new": false,
      "version": 1
    },
    {
      "episodes_info": "",
      "rate": "7.6",
      "cover_x": 2362,
      "title": "晨光正好",
      "url": "https:\/\/movie.douban.com\/subject\/35211730\/",
      "playable": false,
      "cover": "https://img1.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p2880259607.jpg",
      "id": "35211730",
      "cover_y": 3209,
      "is_new": false,
      "version": 2
    },
    {
      "episodes_info": "",
      "rate": "7.5",
      "cover_x": 1895,
      "title": "造梦之家",
      "url": "https:\/\/movie.douban.com\/subject\/35390098\/",
      "playable": false,
      "cover": "https://img9.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p2885995265.jpg",
      "id": "35390098",
      "cover_y": 3000,
      "is_new": false
    },
    {
      "episodes_info": "",
      "rate": "8.6",
      "cover_x": 1080,
      "title": "乐土",
      "url": "https:\/\/movie.douban.com\/subject\/35870056\/",
      "playable": false,
      "cover": "https://img9.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p2881033774.jpg",
      "id": "35870056",
      "cover_y": 1600,
      "is_new": false
    },
    {
      "episodes_info": "",
      "rate": "6.4",
      "cover_x": 1334,
      "title": "菜单",
      "url": "https:\/\/movie.douban.com\/subject\/30455615\/",
      "playable": false,
      "cover": "https://img9.doubanio.com\/view\/photo\/s_ratio_poster\/public\/p2880339524.jpg",
      "id": "30455615",
      "cover_y": 2000,
      "is_new": false
    }
  ],
  "bicyle": {
    "title": "哈哈"
  }
}

解析_jsonpath.py对本地数据进行解析

# _*_ coding : utf-8 _*_
# @Time : 2023/2/17 7:11 PM
# @Author : yanhh
# @File : 解析_jsonpath
# @Project : pythonProject

import jsonpath
import json

obj = json.load(open('douban1.json', 'r', encoding='utf8'))
# 1、根目录下subjects下所有title的名字。 * 代表 所有; 具体数字 代表下标
# 结果:  ['巴比伦', '塔尔', '西线无战事', '晨光正好', '造梦之家', '乐土', '菜单']
# movie_list = jsonpath.jsonpath(obj, '$.subjects[*].title')
# print(movie_list)

# 2、根目录下所有title的名字
# 结果: ['巴比伦', '塔尔', '西线无战事', '晨光正好', '造梦之家', '乐土', '菜单', '哈哈']
# movie_list = jsonpath.jsonpath(obj, '$..title')
# print(movie_list)

# 3.相当于xpath中的/
# [{'episodes_info': '', 'rate': '8.0', 'cover_x': 5000, 'title': '巴比伦', 'url': 'https://movie.douban.com/subject/34467461/', 'playable': False, 'cover': 'https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2884457470.jpg', 'id': '34467461', 'cover_y': 7407, 'is_new': False}, {'episodes_info': '', 'rate': '7.4', 'cover_x': 1800, 'title': '塔尔', 'url': 'https://movie.douban.com/subject/35430833/', 'playable': False, 'cover': 'https://img9.doubanio.com/view/photo/s_ratio_poster/public/p2883951104.jpg', 'id': '35430833', 'cover_y': 2570, 'is_new': False}, {'episodes_info': '', 'rate': '8.5', 'cover_x': 1000, 'title': '西线无战事', 'url': 'https://movie.douban.com/subject/3042261/', 'playable': False, 'cover': 'https://img9.doubanio.com/view/photo/s_ratio_poster/public/p2879787106.jpg', 'id': '3042261', 'cover_y': 1500, 'is_new': False}, {'episodes_info': '', 'rate': '7.6', 'cover_x': 2362, 'title': '晨光正好', 'url': 'https://movie.douban.com/subject/35211730/', 'playable': False, 'cover': 'https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2880259607.jpg', 'id': '35211730', 'cover_y': 3209, 'is_new': False}, {'episodes_info': '', 'rate': '7.5', 'cover_x': 1895, 'title': '造梦之家', 'url': 'https://movie.douban.com/subject/35390098/', 'playable': False, 'cover': 'https://img9.doubanio.com/view/photo/s_ratio_poster/public/p2885995265.jpg', 'id': '35390098', 'cover_y': 3000, 'is_new': False}, {'episodes_info': '', 'rate': '8.6', 'cover_x': 1080, 'title': '乐土', 'url': 'https://movie.douban.com/subject/35870056/', 'playable': False, 'cover': 'https://img9.doubanio.com/view/photo/s_ratio_poster/public/p2881033774.jpg', 'id': '35870056', 'cover_y': 1600, 'is_new': False}, {'episodes_info': '', 'rate': '6.4', 'cover_x': 1334, 'title': '菜单', 'url': 'https://movie.douban.com/subject/30455615/', 'playable': False, 'cover': 'https://img9.doubanio.com/view/photo/s_ratio_poster/public/p2880339524.jpg', 'id': '30455615', 'cover_y': 2000, 'is_new': False}]
# movie_list = jsonpath.jsonpath(obj, '$.subjects.*')
# print(movie_list)

# 4、第三个电影信息
# movie_list = jsonpath.jsonpath(obj, '$.subjects[2]')
# print(movie_list)

# 5、最后一个电影信息
# movie_list = jsonpath.jsonpath(obj, '$.subjects[(@.length-1)]')
# print(movie_list)

# 6、前两个电影信息 [0,1]
# movie_list = jsonpath.jsonpath(obj, '$.subjects[0,1]')
# print(movie_list)

# 6、前两个电影信息 [:2]
# movie_list = jsonpath.jsonpath(obj, '$.subjects[:2]')
# print(movie_list)

# 7、过滤出包含version的数据,注意 ?
# movie_list = jsonpath.jsonpath(obj, '$.subjects[?(@.version)]')
# print(movie_list)

# 8、过滤出cover_x>3000的数据,注意 ?
movie_list = jsonpath.jsonpath(obj, '$.subjects[?(@.cover_x>3000)]')
print(movie_list)

2、实战

代码如下(示例):爬拉钩数据

# _*_ coding : utf-8 _*_
# @Time : 2023/2/17 9:21 PM
# @Author : yanhh
# @File : 解析_jsonpath解析boss
# @Project : pythonProject
import json
import urllib.request
import ssl

import jsonpath

ssl._create_default_https_context = ssl._create_unverified_context

# url = 'https://www.zhipin.com/wapi/zpgeek/search/joblist.json?scene=1&query=&city=101010100&experience=°ree=&industry=&scale=&stage=&position=100301&jobType=&salary=&multiBusinessDistrict=&multiSubway=&page=1&pageSize=30'
url = 'https://gate.lagou.com/v1/neirong/positions/sem/searchPosition?keyword=%E6%B5%8B%E8%AF%95&pageNo=1&pageSize=6&_t=1676642229300'

headers = {
    'Accept': '*/*',
    # 'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    'Connection': 'keep-alive',
    'Content-Type': 'application/json',
    'Cookie': 'user_trace_token=20230217215637-f6f3960e-cc36-42e3-b812-6162a6c577f3; X_HTTP_TOKEN=fd3c39a4571f3ae48912466761551eed12bb676ef7; _ga=GA1.2.221373297.1676642198; _gat=1; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1676642198; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1676642198; LGSID=20230217215638-7e4ee3cc-6303-4949-b783-501325cfe32a; PRE_UTM=m_cf_cpt_baidu_pcbt; PRE_HOST=www.baidu.com; PRE_SITE=https%3A%2F%2Fwww.baidu.com%2Fother.php%3Fsc.Ks0000aqqdzCLIcx91eL5m5mP15lm-Oekb6BUzdhGtNuXMo5W3qAhXLWM56Q9%5FomP9VF2UPHLCpCadJSlqvnA7G80jd%5FliyMyStdj1LauDCJstXEH8zAyfxclJ76tSKPVpK%5Fdtdg2b0kjglwEo94iUDN8UEcQc0AK50UM2aJgsb%5Fozjj0EcSIMwOFkvWfI6WYksDtLf-hb4GRHpcUUyMaGkL4D6a.7Y%5FNR2Ar5Od663rj6tJQrGvKD77h24SU5WudF6ksswGuh9J4qt7jHzk8sHfGmYt%5FrE-9kYryqM764TTPqKi%5FnYQZHuukL0.TLFWgv-b5HDkrfK1ThPGujYknHb0THY0IAYqs2v4VnL30ZN1ugFxIZ-suHYs0A7bgLw4TARqnsKLULFb5TaV8UHPS0KzmLmqn0KdThkxpyfqnHRYPHD4rHRdPsKVINqGujYkPHcdnH6kPfKVgv-b5HDznHnLPWcL0AdYTAkxpyfqnHc3nWm0TZuxpyfqn0KGuAnqiDF70ZKGujY10APGujYYP1R0mLFW5Hc3nHnd%26dt%3D1676642192%26wd%3D%25E6%258B%2589%25E5%258B%25BE%25E7%25BD%2591%26tpl%3Dtpl%5F12826%5F31784%5F0%26l%3D1545199557%26us%3DlinkVersion%253D1%2526compPath%253D10036.0-10032.0%2526label%253D%2525E4%2525B8%2525BB%2525E6%2525A0%252587%2525E9%2525A2%252598%2526linkType%253D%2526linkText%253D%2525E3%252580%252590%2525E6%25258B%252589%2525E5%25258B%2525BE%2525E6%25258B%25259B%2525E8%252581%252598%2525E3%252580%252591%2525E5%2525AE%252598%2525E6%252596%2525B9%2525E7%2525BD%252591%2525E7%2525AB%252599%252520-%252520%2525E4%2525BA%252592%2525E8%252581%252594%2525E7%2525BD%252591%2525E9%2525AB%252598%2525E8%252596%2525AA%2525E5%2525A5%2525BD%2525E5%2525B7%2525A5; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2Flanding-page%2Fpc%2Fsearch.html%3Futm%5Fsource%3Dm%5Fcf%5Fcpt%5Fbaidu%5Fpcbt; LGUID=20230217215638-b3f2398f-da8f-4a31-8512-eb7977969dd5; LGRID=20230217215657-d736d822-05df-4091-82e1-435e4a5cc3f3',
    'Host': 'gate.lagou.com',
    'Origin': 'https://www.lagou.com',
    'Referer': 'https://www.lagou.com/',
    'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
    'X-L-REQ-HEADER': '{deviceType:1}',
}

request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf8')
# 全量打印json数据
# print(content)
#写入boss.json
with open('boss.json', 'w', encoding='utf8') as fp:
    fp.write(content)
# jsonpath进行处理数据,jsonpath只能处理本地数据
obj = json.load(open('boss.json', 'r', encoding='utf8'))
job_list = jsonpath.jsonpath(obj, '$.content.positions[*]')
# 打印处理后的数据
# print(job_list)

保存本地数据为boss.json

{
  "state": 1,
  "message": "操作成功",
  "content": {
    "positions": [
      {
        "positionId": 9388750,
        "positionName": "Software QA Engineer/软件测试工程师",
        "companyId": 29303,
        "companyShortName": "活跃网络",
        "companyName": null,
        "companyLogo": "https://s0.lgstatic.com/thumbnail_360x360/i/image2/M01/A3/14/CgotOVvYI_CAVrCFAAAaUSx2tYg486.png",
        "companySize": "500-2000人",
        "industryField": "移动互联网",
        "financeStage": "上市公司",
        "hiTagList": null,
        "positionLables": [
          "IT技术服务|咨询",
          "软件服务|咨询"
        ],
        "createTime": "2023-02-17 21:01:07.0",
        "city": "成都",
        "district": "锦江区",
        "businessZone": null,
        "salary": "16k-20k",
        "salaryMonth": null,
        "workYear": "3-5年",
        "education": "本科",
        "deliverTime": null,
        "lastLogin": 1676624143000,
        "resumeProcessRate": 91,
        "hrVo": null,
        "jobNature": null,
        "activityLabel": null,
        "hasDelivered": null,
        "showId": null
      },
      {
        "positionId": 10010338,
        "positionName": "测试工程师",
        "companyId": 69471,
        "companyShortName": "NCS",
        "companyName": null,
        "companyLogo": "https://s0.lgstatic.com/thumbnail_360x360/i/image6/M00/4B/8B/CioPOWDlZMuAN-YVAAAl_4gAA94643.png",
        "companySize": "500-2000人",
        "industryField": "IT技术服务|咨询,数据服务|咨询",
        "financeStage": "不需要融资",
        "hiTagList": null,
        "positionLables": [
          "IT技术服务|咨询"
        ],
        "createTime": "2023-02-17 21:01:34.0",
        "city": "成都",
        "district": "高新区",
        "businessZone": null,
        "salary": "8k-15k",
        "salaryMonth": null,
        "workYear": "3-5年",
        "education": "本科",
        "deliverTime": null,
        "lastLogin": 1676627709000,
        "resumeProcessRate": 100,
        "hrVo": null,
        "jobNature": null,
        "activityLabel": null,
        "hasDelivered": null,
        "showId": null
      },
      {
        "positionId": 10494297,
        "positionName": "游戏测试",
        "companyId": 116037,
        "companyShortName": "百鲤游戏",
        "companyName": null,
        "companyLogo": "https://s0.lgstatic.com/thumbnail_360x360/i/image2/M00/03/EA/CgoB5lnDLCyAGXkCAAAx1rlei4U341.jpg",
        "companySize": "50-150人",
        "industryField": "游戏",
        "financeStage": "A轮",
        "hiTagList": null,
        "positionLables": [
          "游戏"
        ],
        "createTime": "2023-02-17 21:02:08.0",
        "city": "武汉",
        "district": "硚口区",
        "businessZone": null,
        "salary": "8k-15k",
        "salaryMonth": null,
        "workYear": "3-5年",
        "education": "本科",
        "deliverTime": null,
        "lastLogin": 1676536742000,
        "resumeProcessRate": 87,
        "hrVo": null,
        "jobNature": null,
        "activityLabel": null,
        "hasDelivered": null,
        "showId": null
      },
      {
        "positionId": 11049782,
        "positionName": "测试工程师",
        "companyId": 24995,
        "companyShortName": "泛微",
        "companyName": null,
        "companyLogo": "https://s0.lgstatic.com/thumbnail_360x360/image1/M00/00/33/CgYXBlTUXI-AC08_AACIkHlny3Y866.jpg",
        "companySize": "2000人以上",
        "industryField": "软件服务|咨询",
        "financeStage": "上市公司",
        "hiTagList": null,
        "positionLables": [
          "软件服务|咨询"
        ],
        "createTime": "2023-02-17 21:05:06.0",
        "city": "上海",
        "district": "闵行区",
        "businessZone": "浦江",
        "salary": "11k-15k",
        "salaryMonth": null,
        "workYear": "3-5年",
        "education": "本科",
        "deliverTime": null,
        "lastLogin": 1676630701000,
        "resumeProcessRate": 0,
        "hrVo": null,
        "jobNature": null,
        "activityLabel": null,
        "hasDelivered": null,
        "showId": null
      },
      {
        "positionId": 11048871,
        "positionName": "测试工程师",
        "companyId": 123275905,
        "companyShortName": "视觉变色龙(北京)科技有限公司",
        "companyName": null,
        "companyLogo": "https://s0.lgstatic.com/thumbnail_360x360/i/image6/M00/71/76/CioPOWIMt7yAWyXRAABWlXhDNmM523.png",
        "companySize": "50-150人",
        "industryField": "电商平台,内容社区,数据服务|咨询",
        "financeStage": "不需要融资",
        "hiTagList": null,
        "positionLables": [
          "电商平台",
          "内容社区"
        ],
        "createTime": "2023-02-17 21:05:05.0",
        "city": "北京",
        "district": "东城区",
        "businessZone": null,
        "salary": "12k-18k",
        "salaryMonth": null,
        "workYear": "3-5年",
        "education": "本科",
        "deliverTime": null,
        "lastLogin": 1676612442000,
        "resumeProcessRate": 0,
        "hrVo": null,
        "jobNature": null,
        "activityLabel": null,
        "hasDelivered": null,
        "showId": null
      },
      {
        "positionId": 10945931,
        "positionName": "自动化测试",
        "companyId": 122001458,
        "companyShortName": "科瑞国际",
        "companyName": null,
        "companyLogo": "https://s0.lgstatic.com/thumbnail_360x360/i/image/M00/67/BC/CgqCHl-iOAGAczhKAABeeIMJ6wY726.png",
        "companySize": "2000人以上",
        "industryField": "数据服务,人工智能",
        "financeStage": "上市公司",
        "hiTagList": null,
        "positionLables": [
          "软件服务|咨询",
          "IT技术服务|咨询"
        ],
        "createTime": "2023-02-17 21:03:42.0",
        "city": "西安",
        "district": "雁塔区",
        "businessZone": "电子城",
        "salary": "15k-30k",
        "salaryMonth": null,
        "workYear": "不限",
        "education": "本科",
        "deliverTime": null,
        "lastLogin": 1676092790000,
        "resumeProcessRate": 0,
        "hrVo": null,
        "jobNature": null,
        "activityLabel": null,
        "hasDelivered": null,
        "showId": null
      }
    ],
    "positionCount": 3062
  },
  "uiMessage": null
}

你可能感兴趣的:(爬虫,python,pycharm,json)