Flume配置文件(多个sources,多个channels,自定义拦截器 )

# Name the components on this agent
a1.sources = r1 r2 
a1.sinks = k1 k2 k3 k4 
a1.channels = c1 c2

#Describe/configure the source
# 第一种监控:taildir 监控目录
a1.sources.r1.type = taildir
a1.sources.r1.positionFile = /flume/tail_dir.json
a1.sources.r1.filegroups = f1 f2
# 目录的具体位置
a1.sources.r1.filegroups.f1 = /test/.*file.*
a1.sources.r1.filegroups.f2 = /test/.*log.*
# 第三种监控:exec 监控一个实时追加的文件
a1.sources.r2.type = exec
a1.sources.r2.command = tail -F /test2/a.txt

#拦截器 多路复用   
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = Main$builder  
a1.sources.r1.selector.type = multiplexing
a1.sources.r1.selector.header = type
a1.sources.r1.selector.mapping.bigdata = c1
a1.sources.r1.selector.mapping.language = c1
a1.sources.r1.selector.mapping.other = c2

a1.sources.r2.interceptors = i2
a1.sources.r2.interceptors.i2.type = Main1$builder  
a1.sources.r2.selector.type = multiplexing
a1.sources.r2.selector.header = type
a1.sources.r2.selector.mapping.bigdata1 = c1
a1.sources.r2.selector.mapping.other = c2

# Describe the sink  sink 端的 avro 是一个数据发送者  hostname 可以写ip 也可以写主机名
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = 192.168.40.101
a1.sinks.k1.port =55555

a1.sinks.k2.type = avro
a1.sinks.k2.hostname = 192.168.40.101
a1.sinks.k2.port = 22221

a1.sinks.k4.type = avro
a1.sinks.k4.hostname = 192.168.40.101
a1.sinks.k4.port = 22223

a1.sinks.k3.type = avro
a1.sinks.k3.hostname = 192.168.40.101
a1.sinks.k3.port = 22224

#故障转移 k1 k2 实现故障转移
a1.sinkgroups.g1.processor.type = failover
a1.sinkgroups.g1.processor.priority.k1 = 5
a1.sinkgroups.g1.processor.priority.k2 = 10
a1.sinkgroups.g1.processor.maxpenalty = 10000

#负载均衡  k3 k4 负载均衡
#a1.sinkgroups = g1
#a1.sinkgroups.g1.sinks = k3 k4
# 配置使用负载均衡策略
a1.sinkgroups.g2.processor.type = load_balance
# k3 k4轮流干活
a1.sinkgroups.g2.processor.sinks=k3 k4
a1.sinkgroups.g2.processor.selector = round_robin # 轮询
a1.sinkgroups.g2.processor.selector.maxTimeOut=10000


# Describe the channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
a1.channels.c2.type = memory
a1.channels.c2.capacity = 1000
a1.channels.c2.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sinkgroups.g1.sinks = k1 k2
a1.sinkgroups.g2.sinks = k3 k4

a1.sources.r1.channels = c1 c2 
a1.sinks.k3.channel = c1
a1.sinks.k2.channel = c2

a1.sources.r2.channels =c1  c2 
a1.sinks.k4.channel = c1
a1.sinks.k1.channel = c2

自定义拦截器java

import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * 实践是检验真理的唯一标准
 * 该类用于自定义拦截器
 * 可以拦截含flume的文本文件
 */
        public class Main implements Interceptor  {
            public static void main(String[] args) {
                System.out.println("hello world");
            }
           private ArrayList main_list =new ArrayList<>();
            @Override
            public void initialize() {
                // 该方法用于初始化
            }

            @Override
            public Event intercept(Event event) {
                // 该方法用于处理单个事件
                //该方法里面有两个参数,一个是Headers,一个是Body
                //1.获取事件中的头信息
                Map headers = event.getHeaders();
                //2.获取事件中的 body 信息
                String body = new String(event.getBody());
                //3.根据 body 中是否有"flume"来决定添加怎样的头信息
                if ((body.contains("flume"))||(body.contains("hadoop"))||(body.contains("hive"))) {
                    //4.添加头信息
                    headers.put("type", "bigdata");
                } else if ((body.contains("java"))||(body.contains("python"))||(body.contains("c++"))) {
                    headers.put("type", "language");
                } else {
                    //4.添加头信息
                    headers.put("type", "other");
                }
                return event;
            }

            @Override
            public List intercept(List list) {
                // 该方法用于批处理事件
                //1. 清空上次事件的数据
                if (!main_list.isEmpty()){
                    main_list.clear();
                }
                //2. 遍历事件
                for (Event event : list) {
                     main_list.add( intercept(event));
                }
                // 这就返回
                return main_list;
            }

            @Override
            public void close() {

            }
            public  static class builder implements Builder{
                // 建造者模式
                @Override
                public Interceptor build() {
                    // 建造一个拦截器
                    return new Main();
                }
                @Override
                public void configure(Context context) {
                    // 可以多一些配置文件 如果有
                }
            }
}

参考尚硅谷配置,方便后续直接使用

你可能感兴趣的:(flume,大数据,java)