



1、 下载Flume安装包


2、 解压安装包

tar -zxvf /opt/software/flume-ng-1.5.0-cdh5.3.6.tar.gz -C /opt/modules/

3、 配置

cp flume-env.sh.template flume-env.sh
cp flume-conf.properties.template flume-conf.properties
export JAVA_HOME=/opt/modules/jdk1.7.0_67

4、 测试运行(netcat source + memory channel + logger sink)



(1) 配置source、channel、sink。官网有个例子如下:

# example.conf: A single-node Flume configuration

# Name the components on this agent
#a1.sources=r1 r2 r3
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444

# Describe the sink
a1.sinks.k1.type = logger

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
(2) 启动flume agent:

/opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/bin/flume-ng agent –name a1 –conf conf/ –conf-file conf/demo.conf -Dflume.root.logger=INFO,console
–conf是指定flume的配置文件路径,注意并不是flume agent的配置文件。
–conf-file是指定flume agent的配置文件路径。

(3) 查看44444端口是否已经成功启动:

netstat -tlnup |grep 44444

(4) 测试发送信息

nc localhost 44444
在flume agent的窗口可以看到Flume的sink已经输出到了发送的数据。

5、 测试运行(avro source + file channel + hdfs sink )

(1) 结构图如下所示:


(2) 配置如下:

# example.conf: A single-node Flume configuration

# Name the components on this agent
#a1.sources=r1 r2 r3
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
a1.sources.r1.type = avro
a1.sources.r1.bind = localhost
a1.sources.r1.port = 4141

# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://bigdata-51cdh.chybinmy.com:8020/flume/demo
# default:FlumeData
a1.sinks.k1.hdfs.filePrefix = my-
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollSize = 10240
a1.sinks.k1.hdfs.fileType = DataStream

# Use a channel which buffers events in memory
a1.channels.c1.type = file
#设置file channel的checkpoint目录
a1.channels.c1.checkpointDir = /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/my_check_file
a1.channels.c1.dataDirs = /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/my_data

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

(3) 启动agent

/opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/bin/flume-ng agent -name a1 -c /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/ -conf-file /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/demo2.conf -Dflume.root.logger=INFO,console

(4) 检查端口4141是否启动

netstat -tlnup | grep 4141

(5) 发送avro数据

/opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/bin/flume-ng avro-client -H localhost -p 4141 -F /home/hadoop/input.txt
使用flume自带的avro client将文件input.txt发送到4141端口,flume的avro source接受到数据后,通过file channel,使用hdfs sink发送到配置到好的hdfs路径上去。

(6) 查看HDFS的数据

hadoop fs -ls hdfs://bigdata-51cdh.chybinmy.com:8020/flume/demo
hadoop fs -cat /flume/demo/my-.1502151699610.tmp

6、 测试运行(spooldir source + memory channel + hdfs sink )

(1) 结构图如下所示


(2) 配置如下

# example.conf: A single-node Flume configuration

# Name the components on this agent
#a1.sources=r1 r2 r3
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir = /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/myspoolfils
a1.sources.r1.fileHeader = true
a1.sources.r1.fileHeaderKey = file

# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://bigdata-51cdh.chybinmy.com:8020/flume/spoolfile
# default:FlumeData
a1.sinks.k1.hdfs.filePrefix = spoolfile -
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollSize = 10240
a1.sinks.k1.hdfs.fileType = DataStream

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

(3) 启动agent

/opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/bin/flume-ng agent -name a1 -c /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/ -conf-file /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/demo3.conf -Dflume.root.logger=INFO,console

(4) 将文件拷贝进监听目录

cp /home/hadoop/actionlog2016-08-20.txt /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/myspoolfils/

(5) 查看HDFS文件

hadoop fs -ls /flume/spoolfile
spoolfile source监听目录,有了新文件就读取后存入hdfs上指定的目录中。

7、 测试运行(多个agent汇集到一个agent)

(1) 结构

三个agent:http source + memory channel + avro sink
汇集agent:avro source + file channel + hdfs sink


(2) 配置
a1、a2、a3配置如下:(注意a2、a3要将agent名字应该分别为a3、a4,另外http source监听的端口号应该不同)

# example.conf: A single-node Flume configuration

# Name the components on this agent
#a1.sources=r1 r2 r3
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
a1.sources.r1.type = http
a1.sources.r1.port = 5140
a1.sources.r1.handler = org.apache.flume.source.http.JSONHandler

# Describe the sink
#配置sink类型为avro,以便传递给下一个汇集agent a4
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = localhost
a1.sinks.k1.port = 4545

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1


# example.conf: A single-node Flume configuration

# Name the components on this agent
#a4.sources=r1 r2 r3
a4.sources = r1
a4.sinks = k1
a4.channels = c1

# Describe/configure the source
a4.sources.r1.type = avro
a4.sources.r1.bind = localhost
a4.sources.r1.port = 4545

# Describe the sink
a4.sinks.k1.type = hdfs
a4.sinks.k1.hdfs.path = hdfs://bigdata-51cdh.chybinmy.com:8020/flume/collect/%Y-%m-%d
# default:FlumeData
a4.sinks.k1.hdfs.filePrefix = collect-
a4.sinks.k1.hdfs.useLocalTimeStamp = true
a4.sinks.k1.hdfs.rollInterval = 0
a4.sinks.k1.hdfs.rollCount = 0
a4.sinks.k1.hdfs.rollSize = 10240
a4.sinks.k1.hdfs.fileType = DataStream

# Use a channel which buffers events in memory
a4.channels.c1.type = file
#设置file channel的checkpoint目录
a4.channels.c1.checkpointDir = /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/my_check_file
a4.channels.c1.dataDirs = /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/my_data

# Bind the source and sink to the channel
a4.sources.r1.channels = c1
a4.sinks.k1.channel = c1

(3) 启动agent

/opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/bin/flume-ng agent -n a1 -c /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/ -f /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/a1.conf -Dflume.root.logger=INFO,console &
/opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/bin/flume-ng agent -n a2 -c /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/ -f /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/a2.conf -Dflume.root.logger=INFO,console &
/opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/bin/flume-ng agent -n a3 -c /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/ -f /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/a3.conf -Dflume.root.logger=INFO,console &
/opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/bin/flume-ng agent -n a4 -c /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/ -f /opt/modules/apache-flume-1.5.0-cdh5.3.6-bin/conf/a4.conf -Dflume.root.logger=INFO,console &

(4) 发送post 请求

curl -X POST -d'[{"headers" : {"timestamp" : "434324343","host" :"random_host.example.com"},"body" : "random_body"},{"headers" : {"namenode" : "namenode.example.com","datanode" :"random_datanode.example.com"},"body" :"really_random_body"}]' localhost:5140

(5) 查看HDFS上数据

hadoop fs -ls /flume/collect/2017-08-08
hadoop fs -cat /flume/collect/2017-08-08/collect-.1502177104663.tmp

