学习flume以来,实现了日志的多来源自动抽取和多target的自动发送等,但是一直以来的数据清洗过程一直是放在hadoop中用MR程序定时进行清洗的,有没有一种方式编程能够直接在flume中来进行相关的数据清洗数据匹配,过滤掉那些不规范的脏数据,于是决定打这个flume拦截器的主义,觉得只要把代码稍微改改,从拦截body开始自定义intercepter编程完成每个body字符串的解析字段的正则提取和拼接,我们自定义的这个类叫:LogAnalysis 如下:
package com.besttone.interceptor;
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import org.apache.commons.lang.StringUtils;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author zhenzhen
* @create 2016-08-18 上午8:47
**/
public class LogAnalysis implements Interceptor {
private LogAnalysis() {
}
@Override
public void initialize() {
// NO-OP...
}
@Override
public void close() {
// NO-OP...
}
// JAVA中用于处理字符串常用的有三个类:
//
// java.lang.String、
//
// java.lang.StringBuffer、
//
// java.lang.StringBuilder,
//
// 这三者的共同之处都是 final 类,不允许被继承,这主要是从性能和安全性上考虑的,因为这几个类都是经常被使用着的,且考虑到防止其中的参数被修改影响到其它的应用。
//
// StringBuffer 与 StringBuilder 两个基本上差不多,只是 StringBuffer 是线程安全,可以不需要额外的同步用于多线程中;
//
// StringBuilder 是非同步,运行于多线程中就需要使用着单独同步处理,但是速度就比 StringBuffer 快多了;二者之间的共同点都可以通过append、insert进行字符串的操作。
//
// String 实现了三个接口:Serializable、Comparable、CharSequence,
//
// 而 StringBuffer 及 StringBuilder 只实现了两个接口 Serializable、CharSequence,相比之下 String 的实例可以通过 compareTo 方法进行比较,而其它两个就不可以。
@Override
public Event intercept(Event event) {
String body = new String(event.getBody(), Charsets.UTF_8);
System.out.println("body:"+body.toString());
//String line = "2016-04-18 16:00:00 {\"areacode\":\"浙江省丽水市\",\"countAll\":0,\"countCorrect\":0,\"datatime\":\"4134362\",\"logid\":\"201604181600001184409476\",\"requestinfo\":\"{\\\"sign\\\":\\\"4\\\",\\\"timestamp\\\":\\\"1460966390499\\\",\\\"remark\\\":\\\"4\\\",\\\"subjectPro\\\":\\\"123456\\\",\\\"interfaceUserName\\\":\\\"12345678900987654321\\\",\\\"channelno\\\":\\\"100\\\",\\\"imei\\\":\\\"12345678900987654321\\\",\\\"subjectNum\\\":\\\"13989589062\\\",\\\"imsi\\\":\\\"12345678900987654321\\\",\\\"queryNum\\\":\\\"13989589062\\\"}\",\"requestip\":\"36.16.128.234\",\"requesttime\":\"2016-04-18 16:59:59\",\"requesttype\":\"0\",\"responsecode\":\"010005\",\"responsedata\":\"无查询结果\"}\n";
String pattern1 = "\"areacode\":\"[\\u4e00-\\u9fa5]*"; //汉字正则表达式
String pattern2 = "\"datatime\":\"[0-9]*"; //数字正则表达式
String pattern3 = "\\\\\"imei\\\\\":\\\\\"[0-9]*"; //时间正则表达式 \\\\\"imei\\\\\":\\\\\"
String pattern4 = "\"requestip\":\"[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}"; //ip正则表达式
String pattern5 = "\"requesttime\":\"((19|20)\\d\\d)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01]) ([012][0-9]):([0-5][0-9]):([0-5][0-9])"; //"requesttime":"2016-04-18 16:00:00
//String pattern = "\"areacode\":\"[^0-9a-z]*\",";
// 创建 Pattern 对象
Pattern r1 = Pattern.compile(pattern1);
Pattern r2 = Pattern.compile(pattern2);
Pattern r3 = Pattern.compile(pattern3);
Pattern r4 = Pattern.compile(pattern4);
Pattern r5 = Pattern.compile(pattern5);
// 现在创建 matcher 对象
Matcher m1 = r1.matcher(body);
Matcher m2 = r2.matcher(body);
Matcher m3 = r3.matcher(body);
Matcher m4 = r4.matcher(body);
Matcher m5 = r5.matcher(body);
StringBuffer bodyoutput = new StringBuffer();
if (m1.find() && m2.find() && m3.find() && m4.find() && m5.find()) {
bodyoutput = bodyoutput.append(m1.group(0)+("|")+m2.group(0)+"|"+m3.group(0)+"|"+m4.group(0)+"|"+m5.group(0));
} else {
bodyoutput = bodyoutput.append("No match!!!");
}
//System.out.println("result:"+JsonUtil.ObjectToJsonString(report));
event.setBody(bodyoutput.toString().getBytes());
return event;
}
@Override
public List intercept(List events) {
List intercepted = Lists.newArrayListWithCapacity(events.size());
for (Event event : events) {
Event interceptedEvent = intercept(event);
if (interceptedEvent != null) {
intercepted.add(interceptedEvent);
}
}
return intercepted;
}
public static class Builder implements Interceptor.Builder {
//使用Builder初始化Interceptor
@Override
public Interceptor build() {
return new LogAnalysis();
}
@Override
public void configure(Context context) {
}
}
}
从上面的代码可以看出我们只要
public class LogAnalysis implements Interceptor
继承这个接口,重新自己自定义实现它。具体的实现过程放到
public Event intercept(Event event)方法中,最后在Interceptor.Builder中返回我们自定义的类LogAnalysis
public static class Builder implements Interceptor.Builder { //使用Builder初始化Interceptor @Override public Interceptor build() { return new LogAnalysis(); } @Override public void configure(Context context) { } }写到这里,大家一定会问,这个程序没有main函数怎么调试呢?
是的,这个问题问的好,其实我当时在学习这个的过程中也思考过这个问题。
其实很简单,我编程的核心部分是在重构flume的event中的body,那么body其本质上就是我们抽取日志中的一行字符串,那么我们可以将这部分字符串的处理拎出来,放到自己的main函数中进行编程调试一旦调试成功,字符串按照我们自定义的方式进行处理了,我们再将这部分代码,放回到上面的event中。这样OK了。
下面是我调试上面正则处理逻辑的代码:
package com.besttone.test;
import com.google.common.base.Charsets;
/**
* @author zhenzhen
* @create 2016-08-18 上午8:47
**/
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.lang.*;
/**
* Created by mac on 16/9/5.
*/
public class testmain {
public static void main(String args[]) {
// String body = new String();
// body="{\"mt_msgid\":\"2020160830153684293214\",\"req_result\":\"00000\",\"send_child_account\":\"guangdong\",\"send_number\":\"02034316868\",\"send_parent_account\":\"cm_mpqg\",\"send_time\":\"2016-08-30 15:25:44\",\"sms_content\":\"感谢您的来电!祝身体健康,工作顺利!\",\"target_number\":\"13543435550\"}";
// System.out.println("body:" + body.toString());
// SMSReport report=JsonUtil.jsonStringToObject(body.toString(), SMSReport.class);
// StringBuffer mstr = new StringBuffer(report.getMt_msgid());
// report.setMt_msgid(mstr.reverse().toString());//反转msgid
// if(StringUtils.isBlank(report.getMt_status())) {//短信下发日志匹配地区,下发状态反馈不需要
// // 根据发送、接收号码判断所属地区和省份,有的号码查询不到,过滤掉
// //begin
// System.out.println("body:" + report.getMt_msgid().toString());
// }
//
// System.out.println("result:"+JsonUtil.ObjectToJsonString(report));
String line = "2016-04-18 16:00:00 {\"areacode\":\"浙江省丽水市\",\"countAll\":0,\"countCorrect\":0,\"datatime\":\"4134362\",\"logid\":\"201604181600001184409476\",\"requestinfo\":\"{\\\"sign\\\":\\\"4\\\",\\\"timestamp\\\":\\\"1460966390499\\\",\\\"remark\\\":\\\"4\\\",\\\"subjectPro\\\":\\\"123456\\\",\\\"interfaceUserName\\\":\\\"12345678900987654321\\\",\\\"channelno\\\":\\\"100\\\",\\\"imei\\\":\\\"12345678900987654321\\\",\\\"subjectNum\\\":\\\"13989589062\\\",\\\"imsi\\\":\\\"12345678900987654321\\\",\\\"queryNum\\\":\\\"13989589062\\\"}\",\"requestip\":\"36.16.128.234\",\"requesttime\":\"2016-04-18 16:59:59\",\"requesttype\":\"0\",\"responsecode\":\"010005\",\"responsedata\":\"无查询结果\"}\n";
String pattern1 = "\"areacode\":\"[\\u4e00-\\u9fa5]*"; //汉字正则表达式
String pattern2 = "\"datatime\":\"[0-9]*"; //数字正则表达式
String pattern3 = "\\\\\"imei\\\\\":\\\\\"[0-9]*"; //时间正则表达式 \\\\\"imei\\\\\":\\\\\"
String pattern4 = "\"requestip\":\"[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}"; //ip正则表达式
String pattern5 = "\"requesttime\":\"((19|20)\\d\\d)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01]) ([012][0-9]):([0-5][0-9]):([0-5][0-9])"; //"requesttime":"2016-04-18 16:00:00
//String pattern = "\"areacode\":\"[^0-9a-z]*\",";
// 创建 Pattern 对象
Pattern r1 = Pattern.compile(pattern1);
Pattern r2 = Pattern.compile(pattern2);
Pattern r3 = Pattern.compile(pattern3);
Pattern r4 = Pattern.compile(pattern4);
Pattern r5 = Pattern.compile(pattern5);
// 现在创建 matcher 对象
Matcher m1 = r1.matcher(line);
Matcher m2 = r2.matcher(line);
Matcher m3 = r3.matcher(line);
Matcher m4 = r4.matcher(line);
Matcher m5 = r5.matcher(line);
if (m1.find() && m2.find() && m3.find() && m4.find() && m5.find()) {
System.out.println("origin string : " + line);
StringBuffer bodyoutput = new StringBuffer(m1.group(0)+("|")+m2.group(0)+"|"+m3.group(0)+"|"+m4.group(0)+"|"+m5.group(0));
;
System.out.println("Found value: " + bodyoutput);
//System.out.println("Found value: " + m2.group(0) );
//System.out.println("Found value: " + m3.group(0) );
//System.out.println("Found value: " + m4.group(0) );
} else {
System.out.println("NO MATCH");
}
}
}
下面在贴一下我的日志内容格式:(取了几条大家用于学习测试)
2016-04-18 16:00:00 {"areacode":"浙江省丽水市","countAll":0,"countCorrect":0,"datatime":"4134362","logid":"201604181600001184409476","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966390499\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"13989589062\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"13989589062\"}","requestip":"36.16.128.234","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"宁夏银川市","countAll":0,"countCorrect":0,"datatime":"4715990","logid":"201604181600001858043208","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966400120\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1210\",\"imei\":\"A0000044ABFD25\",\"subjectNum\":\"15379681917\",\"imsi\":\"460036951451601\",\"queryNum\":\"\"}","requestip":"115.168.93.87","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果","userAgent":"ZTE-Me/Mobile"}
2016-04-18 16:00:00 {"areacode":"黑龙江省哈尔滨市","countAll":0,"countCorrect":0,"datatime":"5369561","logid":"201604181600001068429609","requestinfo":"{\"interfaceUserName\":\"12345678900987654321\",\"queryNum\":\"\",\"timestamp\":\"1460966400139\",\"sign\":\"4\",\"imsi\":\"460030301212545\",\"imei\":\"35460207765269\",\"subjectNum\":\"55588237\",\"subjectPro\":\"123456\",\"remark\":\"4\",\"channelno\":\"2100\"}","requestip":"42.184.41.180","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"浙江省丽水市","countAll":0,"countCorrect":0,"datatime":"4003096","logid":"201604181600001648238807","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966391025\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"13989589062\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"13989589062\"}","requestip":"36.16.128.234","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"广西南宁市","countAll":0,"countCorrect":0,"datatime":"4047993","logid":"201604181600001570024205","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966382871\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1006\",\"imei\":\"A000004853168C\",\"subjectNum\":\"07765232589\",\"imsi\":\"460031210400007\",\"queryNum\":\"13317810717\"}","requestip":"219.159.72.3","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"海南省五指山市","countAll":0,"countCorrect":0,"datatime":"5164117","logid":"201604181600001227842048","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966399159\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1017\",\"imei\":\"A000005543AFB7\",\"subjectNum\":\"089836329061\",\"imsi\":\"460036380954376\",\"queryNum\":\"13389875751\"}","requestip":"140.240.171.71","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"山西省","countAll":0,"countCorrect":0,"datatime":"14075772","logid":"201604181600001284030648","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966400332\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1006\",\"imei\":\"A000004FE0218A\",\"subjectNum\":\"03514043633\",\"imsi\":\"460037471517070\",\"queryNum\":\"\"}","requestip":"1.68.5.227","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"四川省","countAll":0,"countCorrect":0,"datatime":"6270982","logid":"201604181600001173504863","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966398896\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"13666231300\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"13666231300\"}","requestip":"182.144.66.97","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"浙江省","countAll":0,"countCorrect":0,"datatime":"4198522","logid":"201604181600001390637240","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966399464\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"05533876327\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"05533876327\"}","requestip":"36.23.9.49","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"000000","responsedata":"操作成功"}
2016-04-18 16:00:00 {"areacode":"江苏省连云港市","countAll":0,"countCorrect":0,"datatime":"4408097","logid":"201604181600001249944032","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966395908\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"18361451463\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"18361451463\"}","requestip":"58.223.4.210","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"浙江省","countAll":0,"countCorrect":0,"datatime":"5154518","logid":"201604181600001714496463","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966399474\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"05533876327\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"05533876327\"}","requestip":"36.23.9.49","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"000000","responsedata":"操作成功"}
2016-04-18 16:00:00 {"areacode":"浙江省","countAll":0,"countCorrect":0,"datatime":"4761269","logid":"201604181600001187577136","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966400191\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"057427895481\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"057427895481\"}","requestip":"36.23.153.219","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"河北省廊坊市","countAll":0,"countCorrect":0,"datatime":"75408665","logid":"201604181600001020722122","requestinfo":"{\"subjectNum\":\"13582968216\",\"imsi\":\"460031298611058\",\"queryNum\":\"18033684000\",\"channelno\":\"100\",\"imei\":\"99000586096233\"}","requestip":"110.251.61.62","requesttime":"2016-04-18 16:00:00","requesttype":"28","responsecode":"010005","responsedata":"查询结果为空"}
2016-04-18 16:00:00 {"areacode":"贵州省黔西南州兴义市","countAll":0,"countCorrect":0,"datatime":"4586950","logid":"201604181600001499837763","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966398600\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1006\",\"imei\":\"865707029710377\",\"subjectNum\":\"509\",\"imsi\":\"460025864693571\",\"queryNum\":\"\"}","requestip":"111.85.45.172","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"云南省昆明市","countAll":0,"countCorrect":0,"datatime":"4441961","logid":"201604181600001794147521","requestinfo":"{\"interfaceUserName\":\"12345678900987654321\",\"queryNum\":\"13618922555\",\"timestamp\":\"1460966401214\",\"sign\":\"4\",\"imsi\":\"12345678900987654321\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"13618922555\",\"subjectPro\":\"123456\",\"remark\":\"4\",\"channelno\":\"100\"}","requestip":"113.63.132.128","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"江苏省连云港市","countAll":0,"countCorrect":0,"datatime":"4186305","logid":"201604181600001175993827","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966397309\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"18361451463\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"18361451463\"}","requestip":"58.223.4.210","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"江苏省","countAll":0,"countCorrect":0,"datatime":"4103662","logid":"201604181600001051944754","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966399642\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1006\",\"imei\":\"a0000059788b71\",\"subjectNum\":\"768\",\"imsi\":\"460036660539168\",\"queryNum\":\"\"}","requestip":"180.98.180.95","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"山西省","countAll":0,"countCorrect":0,"datatime":"4247256","logid":"201604181600001013319164","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966400334\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1006\",\"imei\":\"A000004FE0218A\",\"subjectNum\":\"03514043633\",\"imsi\":\"460037471517070\",\"queryNum\":\"\"}","requestip":"1.68.5.227","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"北京市","countAll":0,"countCorrect":0,"datatime":"5401532","logid":"201604181600001469644300","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966399603\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"4001004259\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"\"}","requestip":"106.121.0.143","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"北京市","countAll":0,"countCorrect":0,"datatime":"4876709","logid":"201604181600001476349766","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966399603\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"4001004259\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"\"}","requestip":"106.121.0.143","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"江苏省连云港市","countAll":0,"countCorrect":0,"datatime":"4498474","logid":"201604181600001508125886","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966397987\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"18361451463\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"18361451463\"}","requestip":"58.223.4.210","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"江苏省连云港市","countAll":0,"countCorrect":0,"datatime":"4318254","logid":"201604181600001766447939","requestinfo":"{\"subjectNum\":\"66699\",\"imsi\":\"460036611592505\",\"queryNum\":\"\",\"channelno\":\"100\",\"imei\":\"A00000457ECC28\"}","requestip":"58.223.4.210","requesttime":"2016-04-18 16:00:00","requesttype":"28","responsecode":"000000","responsedata":"操作成功"}
2016-04-18 16:00:00 {"areacode":"江西省南昌市","countAll":0,"countCorrect":0,"datatime":"244260927","logid":"201604181559591112708085","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966400525\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1006\",\"imei\":\"a000004f883c2e\",\"subjectNum\":\"813161\",\"imsi\":\"460031392055476\",\"queryNum\":\"\"}","requestip":"182.97.149.145","requesttime":"2016-04-18 15:59:59","requesttype":"0","responsecode":"010005","responsedata":"无查询结果","userAgent":"Dalvik/1.6.0 (Linux; U; Android 4.4.2; HUAWEI P7-L09 Build/HuaweiP7-L09)"}
2016-04-18 16:00:00 {"areacode":"上海市黄浦区","countAll":0,"countCorrect":0,"datatime":"4657170","logid":"201604181600001303952983","requestinfo":"{\"interfaceUserName\":\"12345678900987654321\",\"queryNum\":\"\",\"timestamp\":\"1460966400444\",\"sign\":\"4\",\"imei\":\"a000005901fef3\",\"subjectNum\":\"4235\",\"subjectPro\":\"123456\",\"remark\":\"4\",\"channelno\":\"9000\"}","requestip":"124.74.160.162","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果","userAgent":"Dalvik/2.1.0 (Linux; U; Android 6.0; HUAWEI CRR-CL00 Build/HUAWEICRR-CL00)"}
2016-04-18 16:00:00 {"areacode":"江西省南昌市","countAll":0,"countCorrect":0,"datatime":"252676235","logid":"201604181559591152287931","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966400399\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1006\",\"imei\":\"a000004f883c2e\",\"subjectNum\":\"813161\",\"imsi\":\"460031392055476\",\"queryNum\":\"\"}","requestip":"182.97.149.145","requesttime":"2016-04-18 15:59:59","requesttype":"0","responsecode":"010005","responsedata":"无查询结果","userAgent":"Dalvik/1.6.0 (Linux; U; Android 4.4.2; HUAWEI P7-L09 Build/HuaweiP7-L09)"}
2016-04-18 16:00:00 {"areacode":"局域网","countAll":0,"countCorrect":0,"datatime":"5160006","logid":"201604181600001026793341","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966399352\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1002\",\"imei\":\"A00000457ECC28\",\"subjectNum\":\"66699\",\"imsi\":\"460036611592505\",\"queryNum\":\"\"}","requestip":"10.55.80.187","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"江苏省","countAll":0,"countCorrect":0,"datatime":"245262271","logid":"201604181559591753547387","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966399846\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"1006\",\"imei\":\"A000004F661365\",\"subjectNum\":\"2336\",\"imsi\":\"460036580978572\",\"queryNum\":\"\"}","requestip":"180.98.187.27","requesttime":"2016-04-18 15:59:59","requesttype":"0","responsecode":"010005","responsedata":"无查询结果","userAgent":"Dalvik/1.6.0 (Linux; U; Android 4.4.2; HUAWEI C199 Build/HuaweiC199)"}
2016-04-18 16:00:00 {"countAll":0,"countCorrect":0,"logid":"201604181600001605286233","requestip":"36.23.153.219","requesttime":"2016-04-18 16:00:00","requesttype":"0"}
2016-04-18 16:00:00 {"areacode":"浙江省","countAll":0,"countCorrect":0,"datatime":"4203930","logid":"201604181600001873855360","requestinfo":"{\"sign\":\"4\",\"timestamp\":\"1460966400191\",\"remark\":\"4\",\"subjectPro\":\"123456\",\"interfaceUserName\":\"12345678900987654321\",\"channelno\":\"100\",\"imei\":\"12345678900987654321\",\"subjectNum\":\"057427895481\",\"imsi\":\"12345678900987654321\",\"queryNum\":\"057427895481\"}","requestip":"36.23.153.219","requesttime":"2016-04-18 16:00:00","requesttype":"0","responsecode":"010005","responsedata":"无查询结果"}
2016-04-18 16:00:00 {"areacode":"河南省郑州市","countAll":0,"countCorrect":0,"datatime":"338870020","logid":"201604181559591841947051","requestinfo":"{\"subjectNum\":\"621418\",\"imsi\":\"460037561702775\",\"queryNum\":\"\",\"channelno\":\"100\",\"imei\":\"a0000055dc82e3\"}","requestip":"106.33.148.44","requesttime":"2016-04-18 15:59:59","requesttype":"28","responsecode":"000000","responsedata":"操作成功","userAgent":"Dalvik/1.6.0 (Linux; U; Android 4.4.2; PE-CL00 Build/HuaweiPE-CL00)"}
client.sinks = sink_client
client.channels = channel_client
# 日志源组件配置(监听目录下新增文件)
client.sources.source_client.type = spooldir
client.sources.source_client.channels = channel_client
client.sources.source_client.spoolDir = /Users/mac/tmp/input/
client.sources.source_client.fileHeader = true
#正则表达式
client.sources.source_client.interceptors=i1
client.sources.source_client.interceptors.i1.type= com.besttone.interceptor.LogAnalysis$Builder
#client.sources.source_client.interceptors.i1.regex=\"requesttime\":\"((19|20)\\d\\d)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01]) ([012][0-9]):([0-5][0-9]):([0-5][0-9])
#client.sources.source_client.interceptors.i2.type=timestamp
# sink组件配置
client.sinks.sink_client.type = file_roll
client.sinks.sink_client.channel = channel_client
client.sinks.sink_client.sink.directory = /Users/mac/tmp/input/target
#client.sinks.sink_client.sink.directory = /Users/mac/tmp/input/target/%y-%m-%d
client.sinks.sink_client.sink.rollInterval = 3
client.sinks.sink_client.sink.useFileSuffix = true
client.sinks.sink_client.sink.fileSuffix = .COMPLETED
# 文件管道设置
client.channels.channel_client.type = file
client.channels.channel_client.checkpointDir = /Users/mac/tmp/input/inchannel
client.channels.channel_client.dataDirs = /Users/mac/tmp/input/inchanneldata
最后就让我们来启动flume吧
#flume-ng agent -n client -c /Users/mac/flume/conf/ -f /Users/mac/flume/conf/localhost.conf -Dflume.root.logger=DEBUG,console
下面是flume-ng自定义拦截器生效的结果: