Kettle是一款国外开源的ETL工具,纯java编写,可以在Window、Linux、Unix上运行,数据抽取高效稳定。
但是本文重点不是讲Kettle安装和使用。
而是,如何使用Kettle处理Json文件,因为Kettle本身有一点点BUG,JsonInput不能直接处理Utf-8字符的json内容。
所以,要学会变通一下。本例子使用的Kettle版本为7.1,下载网址:https://sourceforge.net/projects/pentaho/files/Data%20Integration/7.1/pdi-ce-7.1.0.0-12.zip/download
首先,打开Kettle的图形编辑界面。
windows版本的命令为Spoon.bat。
看到这个界面说明你的Kettle环境没有问题。
然后点击菜单 文件->新建->转换
开始一个json文件的处理流程。
然后,从核心对象里面拖一个json Input对象出来。
如果json文件不是UTF-8的而是GBK,这样就可以处理了。但是本宝宝不是这样的。
所以,又变通一下,拖了两个组件。
一个是文件内容加载到内存,一个是文本文件输出。
这两个组件都有编码处理的选择项,这样宝宝就可以处理UTF-8的json文件了。
然后就是用连接线把三个组件串联起来,非常简单。
Json Input组件的一些参数是这样配置,当然JsonPath语法不是本文重点。
文件内容加载到内存的参数配置:
文本文件输出的参数配置:
都配置好了,就可以运行了。
然后,去看输出的结果文件。本例子输出了一个csv文件file2.csv。
内容如下:
f2,f3,f4
"{""总分"":0,""rcAppId"":""a637725d-aabb-41e3-8128-713dd06f0b76"",""结果"":""拒绝"",""备注"":""征信不良记录"",""费率"":""0.009900""}",a637725d-aabb-41e3-8128-713dd06f0b76,征信不良记录
它是可以直接用Excel打开看的表格。
json源文件也贴上来。内容还是蛮复杂的,而且我还删减了部分内容。
{
"errorMessage": "",
"errorCode": "",
"responseData": {
"总分": 0,
"rcAppId": "a637725d-aabb-41e3-8128-713dd06f0b76",
"结果": "拒绝",
"备注": "征信不良记录",
"费率": "0.009900"
},
"持久化数据": {
"lastModifiedTime": "2018-03-30 15:49:06",
"bizDate": "2018-03-30",
"errorCode": "",
"productRcvId": 8,
"rawData": {
"request": {
"app": {
"minAmount": "5000.00",
"purpose": "经营",
"bizDate": "2018-03-30",
"accountFourVerification": false,
"agreeTime": 1522395949737,
"orgId": 3,
"isFirst": "false",
"appAmount": "5884.00",
"creditLine": "5884.00",
"rate": "0.009900",
"appId": "244d6d3d-2298-46ff-baaa-b252837585d7",
"accountNo": "未选择",
"isLine": "N",
"partyId": "5224f0bc-55cd-4e62-bbec-192990881660",
"maxAmount": "150000.00",
"loaTime": "20180330154445",
"period": 120,
"amount": "5884.00",
"loanUse": "经营",
"productId": "08363214-a832-4f64-bb80-be352d8d0f0f",
"externalId": "2",
"interestEnumId": 2011001,
"thruDate": 1837958400000,
"fromDate": 1522339200000,
"createBy": "13213571926",
"statusId": 200100,
"createTime": 1522395838000,
"tenantId": "71f28286-f5b3-40e5-ab2f-049b66328b1f"
},
"reference": {
"enterprise": {
"warmPromptMessage": "",
"businessLicenseCode": "123456789123456789",
"semiannualRevenue": "5282828",
"industryInvolved": "",
"isShow": true,
"creditCode": "",
"organizationCode": "",
"accountNo": false,
"name": "永嘉路",
"revenueDocumentId": ["b25ef019-29ed-4e6b-abb1-69667d3793e0"],
"businessPlaceType": "自有",
"annualRevenue": "",
"seq": 4
},
"frequentlyUsedAccount": {
"mobilePhone": "13213571926",
"accountNo": "6217370090101267356",
"fourVerification": false
},
"estate": {
"area": "36",
"isMortgage": false,
"purchaseDate": "",
"documentExplain": "",
"court": "盛世豪城小区",
"type": "住宅",
"structure": "",
"isLift": "电梯",
"toward": "",
"valuation": "108000",
"estateAddress": {
"address": "",
"province": "内蒙古",
"city": "巴彦淖尔",
"district": "临河区"
},
"isOwner": false,
"documentId": ["0ae9d39e-f7ee-44b8-a92e-cdff108401a9"],
"floor": ""
},
"asset": [],
"vehicle": {
"isMortgage": false,
"purchaseDate": "",
"engineNumber": "",
"documentExplain": "",
"type": "",
"plateNumber": "",
"isOwner": false,
"price": "",
"driverLicense": "",
"vin": "",
"documentId": [],
"brand": "",
"vehicleLicense": ""
}
},
"rcAppTypeCode": "2",
"tenantId": "71f28286-f5b3-40e5-ab2f-049b66328b1f",
"custPerson": {
"reservedCity": "巴彦淖尔",
"graduateSchool": "",
"weechatNo": "",
"education": "",
"contactInfo": [{
"workingUnit": "",
"twoVerification": false,
"weechatNo": "",
"mobilePhone": "13623895652",
"qqNo": "",
"name": "江大头",
"threeVerification": false,
"idNo": "410825198702245568",
"email": "",
"seq": 1,
"relation": "家人",
"isFrist": true
}, {
"workingUnit": "",
"twoVerification": false,
"weechatNo": "",
"mobilePhone": "13623895645",
"qqNo": "",
"name": "金城武张根硕",
"threeVerification": true,
"idNo": "15282319821014001X",
"email": "",
"seq": 2,
"relation": "配偶",
"isFrist": false
}, {
"workingUnit": "",
"twoVerification": false,
"weechatNo": "",
"mobilePhone": "13623895654",
"qqNo": "",
"name": "刘浩",
"threeVerification": false,
"idNo": "",
"email": "",
"seq": 3,
"relation": "同事",
"isFrist": false
}, {
"workingUnit": "",
"twoVerification": false,
"weechatNo": "",
"mobilePhone": "13623895651",
"qqNo": "",
"name": "江北",
"threeVerification": false,
"idNo": "",
"email": "",
"seq": 4,
"relation": "朋友",
"isFrist": false
}],
"gender": "",
"ethnicity": "",
"warmPromptMessage": "\n\t\t\t\t\t\t\n\t\t\t\t\t
温馨提示:
1.提供公户和密码。
2.社保局
3.社保局423
",}
最后是贴一下kettle转化配置文件k3.ktr内容:
===================================
返回数组的最后两个值
List lastTwoName = context.read("$.result.records[-2:].name");