Flume自定义Source,Interceptor,sink

插件机制plugin

Flume是一个基于插件的架构。有很多自定义的source,chanel,sink,Serializer实现可以集成进来。除了可以直接将这些自定义的jar添加到flume-env.sh的FLUME_CLASSPATH变量外,还可以放到plugins.d文件夹内。当flume-ng启动时,会从plugins.d文件夹中查找插件,并添加进来。

Each plugin (subdirectory) within plugins.d can have up to three sub-directories:

  • lib - the plugin’s jar(s)
  • libext - the plugin’s dependency jar(s)
  • native - any required native libraries, such as .so files

Example of two plugins within the plugins.d directory:

plugins.d/
plugins.d/custom-source-1/
plugins.d/custom-source-1/lib/my-source.jar
plugins.d/custom-source-1/libext/spring-core-2.5.6.jar
plugins.d/custom-source-2/
plugins.d/custom-source-2/lib/custom.jar
plugins.d/custom-source-2/native/gettext.so

自定义拦截器,实现消息路由机制

需求: 使用flume采集服务器本地日志,需要安装日志类型的不同,将不同种类的日志发送到不同的分析系统。

分析: 服务器打点日志可能有多种,需要将不同类型的日志发送到不同的分析系统。Flume拓扑结构中的Multiplexing结构,其原理是利用event中Header的某个key的值,将不同的event发送到不同的Channel中。我们可以自定义一个Interceptor,为event的Header中的key进行赋值。 在本例中,将包含order关键字的消息发送到flume2,将不包含order关键字的发送到flume3。

flume1.conf

a1.channels=c1 c2
a1.sinks=k1 k2
a1.sources=r1

a1.sources.r1.type=netcat
a1.sources.r1.bind=localhost
a1.sources.r1.port=44441

# source的interceptor配置,根据event的body设置相关的header type值
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = com.example.flumeutils.TypeInterceptor$Builder

a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100

a1.channels.c2.type=memory
a1.channels.c2.capacity=1000
a1.channels.c2.transactionCapacity=100

# source的channel选择器,根据header中的自定义type字段进行event路由,选择不同的channel
a1.sources.r1.selector.type = multiplexing
a1.sources.r1.selector.header = type
a1.sources.r1.selector.mapping.order = c1
a1.sources.r1.selector.mapping.other = c2 

a1.sinks.k1.type=avro
a1.sinks.k1.hostname=localhost
a1.sinks.k1.port=44442

a1.sinks.k2.type=avro
a1.sinks.k2.hostname=localhost
a1.sinks.k2.port=44443

a1.sinks.k1.channel=c1
a1.sinks.k2.channel=c2
a1.sources.r1.channels=c1 c2

flume2.conf

a2.channels=c1
a2.sources=r1
a2.sinks=k1

a2.channels.c1.type=memory
a2.channels.c1.capacity=1000
a2.channels.c1.transactionCapacity=100

a2.sources.r1.type=avro
a2.sources.r1.bind=localhost
a2.sources.r1.port=44442

a2.sinks.k1.type=logger

a2.sinks.k1.channel=c1
a2.sources.r1.channels=c1

flume3.conf

a3.channels=c1
a3.sources=r1
a3.sinks=k1

a3.channels.c1.type=memory
a3.channels.c1.capacity=1000
a3.channels.c1.transactionCapacity=100

a3.sources.r1.type=avro
a3.sources.r1.bind=localhost
a3.sources.r1.port=44443

a3.sinks.k1.type=logger

a3.sinks.k1.channel=c1
a3.sources.r1.channels=c1

Source自定义

source模板类如下

public class MySource extends AbstractSource implements Configurable, PollableSource {
  private String myProp;

  @Override
  public void configure(Context context) {
    String myProp = context.getString("myProp", "defaultValue");

    // Process the myProp value (e.g. validation, convert to another type, ...)

    // Store myProp for later retrieval by process() method
    this.myProp = myProp;
  }

  @Override
  public void start() {
    // Initialize the connection to the external client
  }

  @Override
  public void stop () {
    // Disconnect from external client and do any additional cleanup
    // (e.g. releasing resources or nulling-out field values) ..
  }

  @Override
  public Status process() throws EventDeliveryException {
    Status status = null;

    try {
      // This try clause includes whatever Channel/Event operations you want to do

      // Receive new data
      Event e = getSomeData();

      // Store the Event into this Source's associated Channel(s)
      getChannelProcessor().processEvent(e);

      status = Status.READY;
    } catch (Throwable t) {
      // Log exception, handle individual exceptions as needed

      status = Status.BACKOFF;

      // re-throw all Errors
      if (t instanceof Error) {
        throw (Error)t;
      }
    } finally {
      txn.close();
    }
    return status;
  }
}

需求:自定义source,每2s发送5条消息。

package com.example.flumeutils;

import org.apache.flume.Context;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.PollableSource;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.SimpleEvent;
import org.apache.flume.source.AbstractSource;

public class MySource extends AbstractSource implements Configurable, PollableSource {
  private String prefix;
  private String subfix;

  public Status process() throws EventDeliveryException {
    Status status = null;
    try {
      // mock 接收数据
      for (int i = 0; i < 5; i++) {
        // 创建事件对象
        SimpleEvent event = new SimpleEvent();
        // 给事件设置
        event.setBody((prefix + "--" + i + "--" + subfix).getBytes());
        // 调用channelProcessor发送事件
        getChannelProcessor().processEvent(event);
        status = Status.READY;
      }
      
      Thread.sleep(2000);
    } catch (Exception e) {
      e.printStackTrace();
      status = Status.BACKOFF;
    }
    return status;
  }

  public long getBackOffSleepIncrement() {
    return 0;
  }

  public long getMaxBackOffSleepInterval() {
    return 0;
  }

  public void configure(Context context) {
    prefix = context.getString("prefix");
    subfix = context.getString("subfix", "learn flume");
  }
}

flume1.conf

a1.sources=r1
a1.sinks=k1
a1.channels=c1

a1.sources.r1.type=com.example.flumeutils.MySource
a1.sources.r1.prefix=^^^^^^^
a1.sources.r1.subfix=!

a1.sinks.k1.type=logger

a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100

a1.sinks.k1.channel=c1
a1.sources.r1.channels=c1

Sink自定义

sink不断轮询Channel中的事件且批量地移除它们,并将这些事件批量写入到下游。sink是完全事务性的,在从Channel批量删除数据之前,每个sink用Channel启动一个事务,批量事件一旦成功写入到下游,sink就利用channel提交事务。事务一旦被提交,channel就从自己的内部缓冲区删除事件。

sink模板类如下

public class MySink extends AbstractSink implements Configurable {
  private String myProp;

  @Override
  public void configure(Context context) {
    String myProp = context.getString("myProp", "defaultValue");

    // Process the myProp value (e.g. validation)

    // Store myProp for later retrieval by process() method
    this.myProp = myProp;
  }

  @Override
  public void start() {
    // Initialize the connection to the external repository (e.g. HDFS) that
    // this Sink will forward Events to ..
  }

  @Override
  public void stop () {
    // Disconnect from the external respository and do any
    // additional cleanup (e.g. releasing resources or nulling-out
    // field values) ..
  }

  @Override
  public Status process() throws EventDeliveryException {
    Status status = null;

    // Start transaction
    Channel ch = getChannel();
    Transaction txn = ch.getTransaction();
    txn.begin();
    try {
      // This try clause includes whatever Channel operations you want to do

      Event event = ch.take();

      // Send the Event to the external repository.
      // storeSomeData(e);

      txn.commit();
      status = Status.READY;
    } catch (Throwable t) {
      txn.rollback();

      // Log exception, handle individual exceptions as needed

      status = Status.BACKOFF;

      // re-throw all Errors
      if (t instanceof Error) {
        throw (Error)t;
      }
    }
    return status;
  }
}
package com.example.flumeutils;

import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MySink extends AbstractSink implements Configurable {
  private Logger logger = LoggerFactory.getLogger(MySink.class);

  private String prefix;
  private String subfix;

  public Status process() throws EventDeliveryException {
    Status status = null;

    // Start transaction
    Channel ch = getChannel();
    Transaction txn = ch.getTransaction();
    txn.begin();
    try {
      // This try clause includes whatever Channel operations you want to do

      Event event = ch.take();

      // Send the Event to the external repository.
      // storeSomeData(e);
      if(event!=null) {
        String body = new String(event.getBody());
        logger.info(body);  
      }
      txn.commit();
      status = Status.READY;
    } catch (Throwable t) {
      txn.rollback();

      // Log exception, handle individual exceptions as needed

      status = Status.BACKOFF;

      // re-throw all Errors
      if (t instanceof Error) {
        throw (Error) t;
      }
    } finally {
      txn.close();
    }
    return status;
  }

  public void configure(Context context) {
    prefix = context.getString("prefix");
    subfix = context.getString("subfix", "flume");
  }
}

flume1.conf

a3.channels=c1
a3.sources=r1
a3.sinks=k1

a3.channels.c1.type=memory
a3.channels.c1.capacity=1000
a3.channels.c1.transactionCapacity=100

a3.sources.r1.type=netcat
a3.sources.r1.bind=localhost
a3.sources.r1.port=44443

a3.sinks.k1.type=com.example.flumeutils.MySink

a3.sinks.k1.channel=c1
a3.sources.r1.channels=c1

你可能感兴趣的:(Flume自定义Source,Interceptor,sink)