Flume总结(1)--单个agent采集的基本配置

一、日志采集:从网络端口接收数据,下沉到logger

文件netcat-logger.conf:

 1 # Name the components on this agent
 2 #给那三个组件取个名字
 3 a1.sources = r1
 4 a1.sinks = k1
 5 a1.channels = c1
 6 
 7 # Describe/configure the source
 8 #类型, 从网络端口接收数据,在本机启动, 所以localhost, type=spoolDir采集目录源,目录里有就采
 9 a1.sources.r1.type = netcat
10 a1.sources.r1.bind = localhost
11 a1.sources.r1.port = 44444
12 
13 # Describe the sink
14 a1.sinks.k1.type = logger
15 
16 # Use a channel which buffers events in memory
17 #下沉的时候是一批一批的, 下沉的时候是一个个eventChannel参数解释:
18 #capacity:默认该通道中最大的可以存储的event数量
19 #trasactionCapacity:每次最大可以从source中拿到或者送到sink中的event数量
20 a1.channels.c1.type = memory
21 a1.channels.c1.capacity = 1000
22 a1.channels.c1.transactionCapacity = 100
23 
24 # Bind the source and sink to the channel
25 a1.sources.r1.channels = c1
26 a1.sinks.k1.channel = c1

启动命令:
#告诉flum启动一个agent,指定配置参数, --name:agent的名字,
flume-ng agent --conf conf --conf-file conf/netcat-logger.conf --name a1 -Dflume.root.logger=INFO,console

传入数据:

[root@mini03 ~]# telnet localhost 44444

Trying ::1...
telnet: connect to address ::1: Connection refused
Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
hello world!^H^H^H^H^H^H^H^H^H^H^H^H^H^H
OK
tianjun2012!
OK
控制台看到的数据
2017-05-08 13:41:35,766 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:94)] Event: { headers:{} body: 68 65 6C 6C 6F 20 77 6F 72 6C 64 21 08 08 08 08 hello world!.... }
2017-05-08 13:41:40,153 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:94)] Event: { headers:{} body: 74 69 61 6E 6A 75 6E 32 30 31 32 21 0D tianjun2012!. }

 二、监视文件夹

启动命令:
bin/flume-ng agent -c ./conf -f ./conf/spooldir-logger.conf -n a1 -Dflume.root.logger=INFO,console

测试: 往/home/hadoop/flumespool放文件(mv ././xxxFile /home/hadoop/flumeSpool),但是不要在里面生成文件

# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
#监听目录,spoolDir指定目录, fileHeader要不要给文件夹前坠名
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir = /home/hadoop/flumespool
a1.sources.r1.fileHeader = true

# Describe the sink
a1.sinks.k1.type = logger

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

三、用tail命令获取数据,下沉到hdfs

 1 # Name the components on this agent
 2 a1.sources = r1
 3 a1.sinks = k1
 4 a1.channels = c1
 5 
 6 # Describe/configure the source
 7 a1.sources.r1.type = exec
 8 a1.sources.r1.command = tail -F /home/hadoop/log/test.log
 9 a1.sources.r1.channels = c1
10 
11 # Describe the sink
12 a1.sinks.k1.type = hdfs
13 a1.sinks.k1.channel = c1
14 a1.sinks.k1.hdfs.path = hdfs://mini01:9000/flume/events/%y-%m-%d/%H%M/
15 a1.sinks.k1.hdfs.filePrefix = events-
16 a1.sinks.k1.hdfs.round = true
17 a1.sinks.k1.hdfs.roundValue = 10
18 a1.sinks.k1.hdfs.roundUnit = minute
19 a1.sinks.k1.hdfs.rollInterval = 3
20 a1.sinks.k1.hdfs.rollSize = 20
21 a1.sinks.k1.hdfs.rollCount = 5
22 a1.sinks.k1.hdfs.batchSize = 1
23 a1.sinks.k1.hdfs.useLocalTimeStamp = true
24 #生成的文件类型,默认是Sequencefile,可用DataStream,则为普通文本
25 a1.sinks.k1.hdfs.fileType = DataStream
26 
27 
28 
29 # Use a channel which buffers events in memory
30 a1.channels.c1.type = memory
31 a1.channels.c1.capacity = 1000
32 a1.channels.c1.transactionCapacity = 100
33 
34 # Bind the source and sink to the channel
35 a1.sources.r1.channels = c1
36 a1.sinks.k1.channel = c1

启动命令:
flume-ng agent -c conf -f conf/tail-hdfs.conf -n a1

模拟写入日志:

1 [root@mini03 log]# i=1;
2 while(( $i<=500000 ));
3  do echo $i >> /home/hadoop/log/test.log;
4  sleep 0.5; 
5 let 'i++';done

查看hdfs上的文件内容

 1 [root@mini01 ~]# hdfs dfs -cat /flume/events/17-05-08/1530/*
 2 1
 3 2
 4 3
 5 4
 6 5
 7 6
 8 7
 9 8
10 9
11 10
12 11
13 12
14 13
15 14
16 15
17 16
18 17
19 18
20 19
21 20

注意,本例中,为了快速看到效果,这个值都设置比较小,真实情况需要调整

a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.rollInterval = 3
a1.sinks.k1.hdfs.rollSize = 20
a1.sinks.k1.hdfs.rollCount = 5
22 a1.sinks.k1.hdfs.batchSize = 1

下面给一个真实环境中的配置:

agent1.sources = spooldirSource
agent1.channels = fileChannel
agent1.sinks = hdfsSink

agent1.sources.spooldirSource.type=spooldir
agent1.sources.spooldirSource.spoolDir=/home/hadoop/log
agent1.sources.spooldirSource.channels=fileChannel

agent1.sinks.hdfsSink.type=hdfs
agent1.sinks.hdfsSink.hdfs.path=hdfs://mini01:9000/weblog/flume-input/%y-%m-%d
agent1.sinks.hdfsSink.hdfs.filePrefix=flume-
agent1.sinks.sink1.hdfs.round = true
# Number of seconds to wait before rolling current file (0 = never roll based on time interval)
agent1.sinks.hdfsSink.hdfs.rollInterval = 3600
# File size to trigger roll, in bytes (0: never roll based on file size)
agent1.sinks.hdfsSink.hdfs.rollSize = 128000000
agent1.sinks.hdfsSink.hdfs.rollCount = 0
agent1.sinks.hdfsSink.hdfs.batchSize = 1000

#Rounded down to the highest multiple of this (in the unit configured using hdfs.roundUnit), less than current time.
agent1.sinks.hdfsSink.hdfs.roundValue = 1
agent1.sinks.hdfsSink.hdfs.roundUnit = minute
agent1.sinks.hdfsSink.hdfs.useLocalTimeStamp = true
agent1.sinks.hdfsSink.channel=fileChannel
agent1.sinks.hdfsSink.hdfs.fileType = DataStream


agent1.channels.fileChannel.type = file
agent1.channels.fileChannel.checkpointDir=/tmp/flume/flume-bineckpoint
agent1.channels.fileChannel.dataDirs=/tmp/flume/dataDir

 

bin/flume-ng agent --conf ./conf/ -f conf/spooldir-hdfs.conf -Dflume.root.logger=DEBUG,console -n agent1 > log.log 2>&1 &

转载于:https://www.cnblogs.com/tianjun2012/p/6825066.html

你可能感兴趣的:(Flume总结(1)--单个agent采集的基本配置)