研究了几天nifi的ExecuteScript processor,在同事的帮助下终于调试通过了,记录python和groovy语言的不同实现:
python:
import json import java.io from org.apache.commons.io import IOUtils from java.nio.charset import StandardCharsets from org.apache.nifi.processor.io import StreamCallback from org.apache.nifi.processors.script import ExecuteScript class PyStreamCallback(StreamCallback): def __init__(self): pass def process(self, inputStream, outputStream): text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) obj=json.load(text) newObj = { "Source": "NiFi", "ID": "python", "Name": "test" "meta_data": obj['meta'] } outputStream.write(bytearray(json.dumps(newObj, indent=4).encode('utf-8'))) flowFile = session.get() if flowFile != None: flowFile = session.write(flowFile, PyStreamCallback()) session.transfer(flowFile, ExecuteScript.REL_SUCCESS) else: pass
groovy:
import org.apache.commons.io.IOUtils
import java.nio.charset.*
def flowFile = session.get();
def slurper = new groovy.json.JsonSlurper()
if (flowFile == null) {
return;
}
def flag=false
flowFile = session.write(flowFile,
{ inputStream, outputStream ->
def text = IOUtils.toString(inputStream,StandardCharsets.UTF_8)
def obj = slurper.parseText(text)
flag=false
def bbdErrorLog= obj.bbd_error_log
def bbdErrorLogs=bbdErrorLog.split("code\":")
if(bbdErrorLogs.size()==2){
if (bbdErrorLog.startsWith("[{\"message")){
errorLogCode=Integer.parseInt(bbdErrorLogs[1].split("}")[0])
}else{
errorLogCode=Integer.parseInt(bbdErrorLogs[1].split(",")[0])
}
if (errorLogCode==2000002){
pattern= ~"吊销|注销"
def enterprise_status=obj.enterprise_status
def company_type=obj.company_type
if (company_type=="农民专业合作社"||company_type=="集体所有制"||company_type=="股份合作制"){
flag=true
}else if(enterprise_status=~pattern){
flag=true
}
}else if (errorLogCode==2000302){
flag=true
}else if (errorLogCode==1000202){
pattern= ~"吊销|注销"
def enterprise_status=obj.enterprise_status
if(enterprise_status=~pattern){
flag=true
}
}else if (errorLogCode==1001402){
pattern= ~/[\u4e00-\u9fa5]/
def regOrg=obj.regorg
if(regOrg==~pattern&®Org.length()<20){
flag=true
}
}
}else{
errorLogList=[1000101,1000401,1000501,1000601,1001201,1001301,1001401,2000002]
isFind=true
for (field in bbdErrorLogs){
if (field=="[{\""||field.startsWith("[{\"message\"")){
continue
}
int num = Integer.parseInt(field.substring(0,7))
if (errorLogList.find{it == num}==null){
isFind=false
}
}
if (isFind){
pattern= ~"吊销|注销"
def enterprise_status=obj.enterprise_status
if(enterprise_status=~pattern){
flag=true
}
}
}
def builder = new groovy.json.JsonBuilder()
builder.call(obj)
outputStream.write(builder.toPrettyString().getBytes(StandardCharsets.UTF_8))
} as StreamCallback)
if(flag){
flowFile = session.putAttribute(flowFile, "etl_flag", "DATA_MATCHED")
}else{
flowFile = session.putAttribute(flowFile, "etl_flag", "DATA_NOT_MATCH")
}
session.transfer(flowFile, ExecuteScript.REL_SUCCESS)