目录
Flume自定义Agent
pom文件
自定义Source
测试
自定义Sink
测试
自定义Interceptor
测试
自定义Channel(DualChannel实现)
参考
4.0.0
FlumeAgent
FlumeAgent
1.0
org.eclipse.m2e
lifecycle-mapping
1.0.0
org.apache.maven.plugins
maven-compiler-plugin
[3.1,)
compile
cloudera
https://repository.cloudera.com/artifactory/cloudera-repos/
maven
http://central.maven.org/maven2/
alimaven
http://maven.aliyun.com/nexus/content/groups/public/
org.apache.maven.plugins
maven-resources-plugin
2.5
org.apache.hadoop
hadoop-common
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-hdfs
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-maven-plugins
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-client
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-yarn-server-resourcemanager
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-yarn-server-nodemanager
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-yarn-common
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-mapreduce-client-app
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-mapreduce-client-common
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-mapreduce-client-core
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-mapreduce-client-jobclient
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-yarn-api
2.6.0-cdh5.14.0
org.apache.hadoop
hadoop-yarn-client
2.6.0-cdh5.14.0
org.apache.hive
hive-exec
1.1.0-cdh5.14.0
org.apache.hive
hive-common
1.1.0-cdh5.14.0
org.apache.hive
hive-jdbc
1.1.0-cdh5.14.0
org.apache.flume
flume-ng-core
1.6.0-cdh5.14.0
org.apache.flume.flume-ng-sources
flume-kafka-source
1.6.0-cdh5.14.0
org.apache.hive
hive-metastore
1.1.0-cdh5.14.0
org.apache.hive
hive-exec
1.1.0-cdh5.14.0
org.apache.mrunit
mrunit
1.1.0
hadoop2
test
org.mockito
mockito-all
1.9.5
test
junit
junit
4.10
test
jdk.tools
jdk.tools
1.8
system
${JAVA_HOME}/lib/tools.jar
package com.flume.source;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Random;
import org.apache.flume.Context;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.PollableSource;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.source.AbstractSource;
public class MySource extends AbstractSource implements Configurable,PollableSource{
public long getBackOffSleepIncrement() {
return 0;
}
public long getMaxBackOffSleepInterval() {
return 0;
}
public Status process() throws EventDeliveryException {
//生成0-10的随机数,组合成一个text
Random random = new Random();
int randomNum = random.nextInt(10);
String text = "myfirstSource" + randomNum;
//生成Header
HashMap header = new HashMap();
header.put("id",Integer.toString(randomNum));
//EventBuilder.withBody:将给定的header和body组合成Event,并制定字符集
//getChannelProcessor.processEvent():将给定的Event put到每个配置的Channel
this.getChannelProcessor().processEvent(EventBuilder.withBody(text,Charset.forName("UTF-8"),header));
//Ready状态表示Event可以被取走,还有一个状态是Backoff,表示让Flume睡眠一段时间
return Status.READY;
}
public void configure(Context arg0) {
}
}
生成jar包,拷到$FLUME_HOME/lib下
然后执行命令
flume-ng agent -n a1 -c / -f /udagent.conf -Dflume.root.logger=INFO,console
运行效果
package com.flume.sink;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
public class MySink extends AbstractSink implements Configurable {
public Status process() throws EventDeliveryException {
//初始化Status
Status status = Status.READY;
Transaction trans = null;
try {
//开始事务
Channel channel = getChannel();
trans = channel.getTransaction();
trans.begin();
//获取Event
Event event = channel.take();
while(event!=null){
//获取body
String body = new String(event.getBody());
System.out.println(body);
}
if (event == null) {
status = Status.BACKOFF;
}
trans.commit();
} catch (Exception e) {
//有异常的时候还是要提交
if (trans != null) {
trans.commit();
}
e.printStackTrace();
} finally {
if (trans != null) {
trans.close();
}
}
return status;
}
public void configure(Context arg0) {
}
}
配置文件如下
运行Flume
flume-ng agent -n a1 -c / -f udsinkagent.conf -Dflume.root.logger=INFO,console
将测试文件拷贝进文件夹中,相应的jar包也拷到Flume的lib下
Flume的Console马上会打印出文件的内容
package com.flume.interceptor;
import java.util.List;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import com.google.common.base.Charsets;
public class MyInterceptor implements Interceptor{
public void close() {
}
public void initialize() {
}
//对单个Event内部的处理
public Event intercept(Event event) {
String body = new String(event.getBody(),Charsets.UTF_8);
System.out.println(body);
StringBuffer bodyString = new StringBuffer();
//当发现body以test开头时,将body替换成123456
if(body.startsWith("test")){
bodyString = bodyString.append("123456");
event.setBody(bodyString.toString().getBytes());
}
return event;
}
//对多个Event的处理
public List intercept(List events) {
//reset Event
for(Event event :events ){
if (event!=null) intercept(event);
}
return events;
}
public static class Builder implements Interceptor.Builder{
public void configure(Context arg0) {
}
public Interceptor build() {
return new MyInterceptor();
}
}
}
测试文件
运行Flume
flume-ng agent -n a2 -c / -f /myinterceptor.conf -Dflume.root.logger= INFO,console
测试结果,可以发现所有以test开头的字符串都被替换成了123456
这里参考了一下美团技术团队的文章
https://tech.meituan.com/mt_log_system_optimization.html
Flume本身提供了MemoryChannel和FileChannel。MemoryChannel处理速度快,但缓存大小有限,且没有持久化;FileChannel则刚好相反。我们希望利用两者的优势,在Sink处理速度够快,Channel没有缓存过多日志的时候,就使用MemoryChannel,当Sink处理速度跟不上,又需要Channel能够缓存下应用端发送过来的日志时,就使用FileChannel,由此我们开发了DualChannel,能够智能的在两个Channel之间切换。
/***
* putToMemChannel indicate put event to memChannel or fileChannel
* takeFromMemChannel indicate take event from memChannel or fileChannel
* */
private AtomicBoolean putToMemChannel = new AtomicBoolean(true);
private AtomicBoolean takeFromMemChannel = new AtomicBoolean(true);
void doPut(Event event) {
if (switchon && putToMemChannel.get()) {
//往memChannel中写数据
memTransaction.put(event);
if ( memChannel.isFull() || fileChannel.getQueueSize() > 100) {
putToMemChannel.set(false);
}
} else {
//往fileChannel中写数据
fileTransaction.put(event);
}
}
Event doTake() {
Event event = null;
if ( takeFromMemChannel.get() ) {
//从memChannel中取数据
event = memTransaction.take();
if (event == null) {
takeFromMemChannel.set(false);
}
} else {
//从fileChannel中取数据
event = fileTransaction.take();
if (event == null) {
takeFromMemChannel.set(true);
putToMemChannel.set(true);
}
}
return event;
}
Flume提供了NullSink,可以把不需要的日志通过NullSink直接丢弃,不进行存储。然而,Source需要先将events存放到Channel中,NullSink再将events取出扔掉。为了提升性能,我们把这一步移到了Channel里面做,所以开发了NullChannel。
http://flume.apache.org/releases/content/1.6.0/apidocs/overview-summary.html