插件机制plugin
Flume是一个基于插件的架构。有很多自定义的source,chanel,sink,Serializer实现可以集成进来。除了可以直接将这些自定义的jar添加到flume-env.sh的FLUME_CLASSPATH变量外,还可以放到plugins.d文件夹内。当flume-ng
启动时,会从plugins.d文件夹中查找插件,并添加进来。
Each plugin (subdirectory) within plugins.d can have up to three sub-directories:
- lib - the plugin’s jar(s)
- libext - the plugin’s dependency jar(s)
- native - any required native libraries, such as .so files
Example of two plugins within the plugins.d directory:
plugins.d/
plugins.d/custom-source-1/
plugins.d/custom-source-1/lib/my-source.jar
plugins.d/custom-source-1/libext/spring-core-2.5.6.jar
plugins.d/custom-source-2/
plugins.d/custom-source-2/lib/custom.jar
plugins.d/custom-source-2/native/gettext.so
自定义拦截器,实现消息路由机制
需求: 使用flume采集服务器本地日志,需要安装日志类型的不同,将不同种类的日志发送到不同的分析系统。
分析: 服务器打点日志可能有多种,需要将不同类型的日志发送到不同的分析系统。Flume拓扑结构中的Multiplexing结构,其原理是利用event中Header的某个key的值,将不同的event发送到不同的Channel中。我们可以自定义一个Interceptor,为event的Header中的key进行赋值。 在本例中,将包含order关键字的消息发送到flume2,将不包含order关键字的发送到flume3。
flume1.conf
a1.channels=c1 c2
a1.sinks=k1 k2
a1.sources=r1
a1.sources.r1.type=netcat
a1.sources.r1.bind=localhost
a1.sources.r1.port=44441
# source的interceptor配置,根据event的body设置相关的header type值
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = com.example.flumeutils.TypeInterceptor$Builder
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.channels.c2.type=memory
a1.channels.c2.capacity=1000
a1.channels.c2.transactionCapacity=100
# source的channel选择器,根据header中的自定义type字段进行event路由,选择不同的channel
a1.sources.r1.selector.type = multiplexing
a1.sources.r1.selector.header = type
a1.sources.r1.selector.mapping.order = c1
a1.sources.r1.selector.mapping.other = c2
a1.sinks.k1.type=avro
a1.sinks.k1.hostname=localhost
a1.sinks.k1.port=44442
a1.sinks.k2.type=avro
a1.sinks.k2.hostname=localhost
a1.sinks.k2.port=44443
a1.sinks.k1.channel=c1
a1.sinks.k2.channel=c2
a1.sources.r1.channels=c1 c2
flume2.conf
a2.channels=c1
a2.sources=r1
a2.sinks=k1
a2.channels.c1.type=memory
a2.channels.c1.capacity=1000
a2.channels.c1.transactionCapacity=100
a2.sources.r1.type=avro
a2.sources.r1.bind=localhost
a2.sources.r1.port=44442
a2.sinks.k1.type=logger
a2.sinks.k1.channel=c1
a2.sources.r1.channels=c1
flume3.conf
a3.channels=c1
a3.sources=r1
a3.sinks=k1
a3.channels.c1.type=memory
a3.channels.c1.capacity=1000
a3.channels.c1.transactionCapacity=100
a3.sources.r1.type=avro
a3.sources.r1.bind=localhost
a3.sources.r1.port=44443
a3.sinks.k1.type=logger
a3.sinks.k1.channel=c1
a3.sources.r1.channels=c1
Source自定义
source模板类如下
public class MySource extends AbstractSource implements Configurable, PollableSource {
private String myProp;
@Override
public void configure(Context context) {
String myProp = context.getString("myProp", "defaultValue");
// Process the myProp value (e.g. validation, convert to another type, ...)
// Store myProp for later retrieval by process() method
this.myProp = myProp;
}
@Override
public void start() {
// Initialize the connection to the external client
}
@Override
public void stop () {
// Disconnect from external client and do any additional cleanup
// (e.g. releasing resources or nulling-out field values) ..
}
@Override
public Status process() throws EventDeliveryException {
Status status = null;
try {
// This try clause includes whatever Channel/Event operations you want to do
// Receive new data
Event e = getSomeData();
// Store the Event into this Source's associated Channel(s)
getChannelProcessor().processEvent(e);
status = Status.READY;
} catch (Throwable t) {
// Log exception, handle individual exceptions as needed
status = Status.BACKOFF;
// re-throw all Errors
if (t instanceof Error) {
throw (Error)t;
}
} finally {
txn.close();
}
return status;
}
}
需求:自定义source,每2s发送5条消息。
package com.example.flumeutils;
import org.apache.flume.Context;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.PollableSource;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.SimpleEvent;
import org.apache.flume.source.AbstractSource;
public class MySource extends AbstractSource implements Configurable, PollableSource {
private String prefix;
private String subfix;
public Status process() throws EventDeliveryException {
Status status = null;
try {
// mock 接收数据
for (int i = 0; i < 5; i++) {
// 创建事件对象
SimpleEvent event = new SimpleEvent();
// 给事件设置
event.setBody((prefix + "--" + i + "--" + subfix).getBytes());
// 调用channelProcessor发送事件
getChannelProcessor().processEvent(event);
status = Status.READY;
}
Thread.sleep(2000);
} catch (Exception e) {
e.printStackTrace();
status = Status.BACKOFF;
}
return status;
}
public long getBackOffSleepIncrement() {
return 0;
}
public long getMaxBackOffSleepInterval() {
return 0;
}
public void configure(Context context) {
prefix = context.getString("prefix");
subfix = context.getString("subfix", "learn flume");
}
}
flume1.conf
a1.sources=r1
a1.sinks=k1
a1.channels=c1
a1.sources.r1.type=com.example.flumeutils.MySource
a1.sources.r1.prefix=^^^^^^^
a1.sources.r1.subfix=!
a1.sinks.k1.type=logger
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.sinks.k1.channel=c1
a1.sources.r1.channels=c1
Sink自定义
sink不断轮询Channel中的事件且批量地移除它们,并将这些事件批量写入到下游。sink是完全事务性的,在从Channel批量删除数据之前,每个sink用Channel启动一个事务,批量事件一旦成功写入到下游,sink就利用channel提交事务。事务一旦被提交,channel就从自己的内部缓冲区删除事件。
sink模板类如下
public class MySink extends AbstractSink implements Configurable {
private String myProp;
@Override
public void configure(Context context) {
String myProp = context.getString("myProp", "defaultValue");
// Process the myProp value (e.g. validation)
// Store myProp for later retrieval by process() method
this.myProp = myProp;
}
@Override
public void start() {
// Initialize the connection to the external repository (e.g. HDFS) that
// this Sink will forward Events to ..
}
@Override
public void stop () {
// Disconnect from the external respository and do any
// additional cleanup (e.g. releasing resources or nulling-out
// field values) ..
}
@Override
public Status process() throws EventDeliveryException {
Status status = null;
// Start transaction
Channel ch = getChannel();
Transaction txn = ch.getTransaction();
txn.begin();
try {
// This try clause includes whatever Channel operations you want to do
Event event = ch.take();
// Send the Event to the external repository.
// storeSomeData(e);
txn.commit();
status = Status.READY;
} catch (Throwable t) {
txn.rollback();
// Log exception, handle individual exceptions as needed
status = Status.BACKOFF;
// re-throw all Errors
if (t instanceof Error) {
throw (Error)t;
}
}
return status;
}
}
package com.example.flumeutils;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MySink extends AbstractSink implements Configurable {
private Logger logger = LoggerFactory.getLogger(MySink.class);
private String prefix;
private String subfix;
public Status process() throws EventDeliveryException {
Status status = null;
// Start transaction
Channel ch = getChannel();
Transaction txn = ch.getTransaction();
txn.begin();
try {
// This try clause includes whatever Channel operations you want to do
Event event = ch.take();
// Send the Event to the external repository.
// storeSomeData(e);
if(event!=null) {
String body = new String(event.getBody());
logger.info(body);
}
txn.commit();
status = Status.READY;
} catch (Throwable t) {
txn.rollback();
// Log exception, handle individual exceptions as needed
status = Status.BACKOFF;
// re-throw all Errors
if (t instanceof Error) {
throw (Error) t;
}
} finally {
txn.close();
}
return status;
}
public void configure(Context context) {
prefix = context.getString("prefix");
subfix = context.getString("subfix", "flume");
}
}
flume1.conf
a3.channels=c1
a3.sources=r1
a3.sinks=k1
a3.channels.c1.type=memory
a3.channels.c1.capacity=1000
a3.channels.c1.transactionCapacity=100
a3.sources.r1.type=netcat
a3.sources.r1.bind=localhost
a3.sources.r1.port=44443
a3.sinks.k1.type=com.example.flumeutils.MySink
a3.sinks.k1.channel=c1
a3.sources.r1.channels=c1