总结
一个agent中有多个source、channel、sink
多个agent合并,两层架构
cp hive-mem-size.properties mutiple1.properties
cd /export/datas/flume/
mkdir fileroll
# define sourceName/channelName/sinkName for the agent
a1.sources = s1
a1.channels = c1 c2 c3
a1.sinks = k1 k2 k3
# define the s1
a1.sources.s1.type = exec
a1.sources.s1.command = tail -f /export/servers/hive-1.1.0-cdh5.14.0/logs/hive.log
# define the c1
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# define the c2
a1.channels.c2.type = memory
a1.channels.c2.capacity = 1000
a1.channels.c2.transactionCapacity = 100
# define the c3
a1.channels.c3.type = memory
a1.channels.c3.capacity = 1000
a1.channels.c3.transactionCapacity = 100
# def the k1
a1.sinks.k1.type = logger
# def the k2
a1.sinks.k2.type = hdfs
a1.sinks.k2.hdfs.path = /flume/hdfs/mutiple
a1.sinks.k2.hdfs.filePrefix = hiveLog
a1.sinks.k2.hdfs.fileSuffix = .log
a1.sinks.k2.hdfs.fileType = DataStream
a1.sinks.k2.hdfs.rollSize = 10240
a1.sinks.k2.hdfs.rollInterval = 0
a1.sinks.k2.hdfs.rollCount = 0
# def the k3
a1.sinks.k3.type = file_roll
a1.sinks.k3.sink.directory = /export/datas/flume/fileroll
#source、channel、sink bond
a1.sources.s1.channels = c1 c2 c3
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c2
a1.sinks.k3.channel = c3
bin/flume-ng agent -c conf/ -f userCase/mutiple1.properties -n a1 -Dflume.root.logger=INFO,console
我们这个需求中只是希望将数据放到3个目标地而已,三个sink负责实现,为什么要有三个channel?用一个channel能不能实现呢?
不能
一个Channel中只存储了一份数据
如果多个sink都到一个channel中取数据,那么所有的sink共享一份数据
cp hive-mem-hdfs.properties mutiple2.properties
touch /export/datas/flume/nginx.log
# define sourceName/channelName/sinkName for the agent
a1.sources = s1 s2
a1.channels = c1
a1.sinks = k1
# define the s1
a1.sources.s1.type = exec
a1.sources.s1.command = tail -f /export/servers/hive-1.1.0-cdh5.14.0/logs/hive.log
# define the s2
a1.sources.s2.type = exec
a1.sources.s2.command = tail -f /export/datas/flume/nginx.log
# define the c1
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# def the k1
a1.sinks.k1.type = hdfs
#指定写入HDFS哪个目录中
a1.sinks.k1.hdfs.path = /flume/hdfs/sources
#指定生成的文件的前缀
a1.sinks.k1.hdfs.filePrefix = hiveLog
#指定生成的文件的后缀
a1.sinks.k1.hdfs.fileSuffix = .log
#指定写入HDFS的文件的类型:普通的文件
a1.sinks.k1.hdfs.fileType = DataStream
#source、channel、sink bond
a1.sources.s1.channels = c1
a1.sources.s2.channels = c1
a1.sinks.k1.channel = c1
bin/flume-ng agent -c conf/ -f userCase/mutiple2.properties -n a1 -Dflume.root.logger=INFO,console
优先选择方案二
多台Flume并发多线程写入HDFS
,会导致HDFS磁盘IO和网络的IO剧增,写负载会非常高
减轻HDFS的负载
如何将Flume的数据发送给另外一个Flume呢?
采集层
:采集数据文件,发送到Flume的第二层收集层
:采集所有Flume发过来的数据,发送到HDFStouch /export/datas/nginx.log
cp hive-mem-hdfs.properties collect.properties
##################################a1:第一台和第二台采集日志发送到第三台机器
a1.sources = s1
a1.channels = c1
a1.sinks = k1
# define the s1
a1.sources.s1.type = exec
a1.sources.s1.command = tail -f /export/datas/nginx.log
# define the c1
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# def the k1
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = node-03
a1.sinks.k1.port = 45454
#source、channel、sink bond
a1.sources.s1.channels = c1
a1.sinks.k1.channel = c1
############################################a2:将第一台和第二台发送过来的数据写入HDFS
a2.sources = s1
a2.channels = c1
a2.sinks = k1
# define the s1
a2.sources.s1.type = avro
a2.sources.s1.bind = node-03
a2.sources.s1.port = 45454
# define the c1
a2.channels.c1.type = memory
a2.channels.c1.capacity = 1000
a2.channels.c1.transactionCapacity = 100
# def the k1
a2.sinks.k1.type = hdfs
a2.sinks.k1.hdfs.path = /flume/hdfs/collect
a2.sinks.k1.hdfs.filePrefix = hiveLog
a2.sinks.k1.hdfs.fileSuffix = .log
a2.sinks.k1.hdfs.fileType = DataStream
#source、channel、sink bond
a2.sources.s1.channels = c1
a2.sinks.k1.channel = c1
cd /export/servers/flume-1.6.0-cdh5.14.0-bin/userCase
scp collect.properties node-01:$PWD
scp collect.properties node-02:$PWD
bin/flume-ng agent -c conf/ -f userCase/collect.properties -n a2 -Dflume.root.logger=INFO,console
bin/flume-ng agent -c conf/ -f userCase/collect.properties -n a1 -Dflume.root.logger=INFO,console