mkdir tmpconf
vim tmpconf/a1.conf
a1.sources = r1
a1.channels = c1
a1.sinks = k1
a1.sources.r1.type = netcat
a1.sources.r1.bind = node01
a1.sources.r1.port = 44444
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
a1.sinks.k1.type = logger
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
执行命令
bin/flume-ng agent -c conf -f tmpconf/a1.conf -n a1 -Dflume.root.logger=INFO,console
在另一个节点执行
telnet node01 44444
mkdir export/dir
vim tmpconf/b1.conf
数据
#Name the components on this agent
a1.sources=r1
a1.channels=c1
a1.sinks=k1
#Describe/configure the source
##注意:不能往监控目中重复丢同名文件
a1.sources.r1.type=spooldir
a1.sources.r1.spoolDir=/export/dir
a1.sources.r1.fileHeader = true
#Describe the sink
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.path=hdfs://node01:8020/spooldir/
#Describe the channel
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
#Bind the source and sink to the channel
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
bin/flume-ng agent -c conf -f tmpconf/b1.conf -n a1 -Dflume.root.logger=INFO,console
11.txt
11.txt.COMPLETED
vim tmpconf/c1.conf
数据
a1.sources=r1
a1.channels=c1
a1.sinks=k1
#Describe/configure tail -F source1
a1.sources.r1.type=exec
a1.sources.r1.command =tail -F /export/taillogs/access_log
#Describe sink1
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.path=hdfs://node01:8020/spooldir/
#Use a channel which buffers events in memory
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
#Bind the source and sink to the channel
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
bin/flume-ng agent -c conf -f tmpconf/c1.conf -n a1 -Dflume.root.logger=INFO,console
mkdir /export/taillogs
touch /export/taillogs/access_log
脚本数据
#!/bin/bash
while true
do
date >> /export/servers/taillogs/access_log;
sleep 0.5;
done
查看效果
tail -f /export/taillogs/access_log
vim tmpconf/d1.conf
数据
#Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
#Describe/configure the source
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /export/taillogs/access_log
#Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
##sink端的avro是一个数据发送者
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = node02
a1.sinks.k1.port = 4141
#Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
vim tmpconf/d2.conf
数据
#Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
##source中的avro组件是一个接收者服务
a1.sources.r1.type = avro
a1.sources.r1.bind = node02
a1.sources.r1.port = 4141
#Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://node01:8020/avro
#Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
执行
bin/flume-ng agent -c conf -f tmpconf/d2.conf -n a1 -Dflume.root.logger=INFO,console
bin/flume-ng agent -c conf -f tmpconf/d1.conf -n a1 -Dflume.root.logger=INFO,console
执行顺序不能乱
vim tmpconf/e1.conf
数据
#agent1 name
agent1.channels = c1
agent1.sources = r1
agent1.sinks = k1 k2
##set gruop
agent1.sinkgroups = g1
##set sink group
agent1.sinkgroups.g1.sinks = k1 k2
agent1.sources.r1.type = exec
agent1.sources.r1.command = tail -F /export/taillogs/access_log
##set channel
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapacity = 100
##set sink1
agent1.sinks.k1.type = avro
agent1.sinks.k1.hostname = node02
agent1.sinks.k1.port = 52020
##set sink2
agent1.sinks.k2.type = avro
agent1.sinks.k2.hostname = node03
agent1.sinks.k2.port = 52020
##set failover
agent1.sinkgroups.g1.processor.type = failover
agent1.sinkgroups.g1.processor.priority.k1 = 2
agent1.sinkgroups.g1.processor.priority.k2 = 1
agent1.sinkgroups.g1.processor.maxpenalty = 10000
agent1.sources.r1.channels = c1
agent1.sinks.k1.channel = c1
agent1.sinks.k2.channel = c1
vim tmpconf/e2.conf
数据
#set Agent name
a1.sources = r1
a1.channels = c1
a1.sinks = k1
## other node,nna to nns
a1.sources.r1.type = avro
a1.sources.r1.bind = node02
a1.sources.r1.port = 52020
##set channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
##set sink to hdfs
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.path= hdfs://node01:8020/flume/failover/
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
vim tmpconf/e3.conf
数据
#set Agent name
a1.sources = r1
a1.channels = c1
a1.sinks = k1
## other node,nna to nns
a1.sources.r1.type = avro
a1.sources.r1.bind = node03
a1.sources.r1.port = 52020
##set channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
##set sink to hdfs
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.path= hdfs://node01:8020/flume/failover/
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
bin/flume-ng agent -c conf -f tmpconf/e3.conf -n a1 -Dflume.root.logger=INFO,console
bin/flume-ng agent -c conf -f tmpconf/e2.conf -n a1 -Dflume.root.logger=INFO,console
bin/flume-ng agent -c conf -f tmpconf/e1.conf -n a1 -Dflume.root.logger=INFO,console
vim tmpconf/f1.conf
数据:
#agent name
a1.channels = c1
a1.sources = r1
a1.sinks = k1 k2
#set gruop
a1.sinkgroups = g1
#set sink group
a1.sinkgroups.g1.sinks = k1 k2
#set sources
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /export/taillogs/access_log
#set channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# set sink1
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = node02
a1.sinks.k1.port = 52021
# set sink2
a1.sinks.k2.type = avro
a1.sinks.k2.hostname = node03
a1.sinks.k2.port = 52021
#set failover
a1.sinkgroups.g1.processor.type = load_balance
a1.sinkgroups.g1.processor.backoff = true
a1.sinkgroups.g1.processor.selector = round_robin
a1.sinkgroups.g1.processor.selector.maxTimeOut=10000
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c1
vim tmpconf/f2.conf
数据
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = avro
a1.sources.r1.bind = node02
a1.sources.r1.port = 52021
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Describe the sink
a1.sinks.k1.type = logger
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
vim tmpconf/f3.conf
数据
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = avro
a1.sources.r1.bind = node03
a1.sources.r1.port = 52021
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Describe the sink
a1.sinks.k1.type = logger
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
bin/flume-ng agent -c conf -f tmpconf/f3.conf -n a1 -Dflume.root.logger=INFO,console
bin/flume-ng agent -c conf -f tmpconf/f2.conf -n a1 -Dflume.root.logger=INFO,console
bin/flume-ng agent -c conf -f tmpconf/f1.conf -n a1 -Dflume.root.logger=INFO,console
vim tmpconf/g1.conf
数据
# Name the components on this agent
a1.sources = r1 r2 r3
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /export/taillogs/access.log
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = static
## static拦截器的功能就是往采集到的数据的header中插入自己定## 义的key-value对
a1.sources.r1.interceptors.i1.key = type
a1.sources.r1.interceptors.i1.value = access
a1.sources.r2.type = exec
a1.sources.r2.command = tail -F /export/taillogs/nginx.log
a1.sources.r2.interceptors = i2
a1.sources.r2.interceptors.i2.type = static
a1.sources.r2.interceptors.i2.key = type
a1.sources.r2.interceptors.i2.value = nginx
a1.sources.r3.type = exec
a1.sources.r3.command = tail -F /export/taillogs/web.log
a1.sources.r3.interceptors = i3
a1.sources.r3.interceptors.i3.type = static
a1.sources.r3.interceptors.i3.key = type
a1.sources.r3.interceptors.i3.value = web
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 20000
a1.channels.c1.transactionCapacity = 10000
# Describe the sink
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = node03
a1.sinks.k1.port = 41414
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sources.r2.channels = c1
a1.sources.r3.channels = c1
a1.sinks.k1.channel = c1
vim tmpconf/g1.conf
数据
a1.sources = r1
a1.sinks = k1
a1.channels = c1
#定义source
a1.sources.r1.type = avro
a1.sources.r1.bind = 192.168.52.120
a1.sources.r1.port =41414
#添加时间拦截器
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = org.apache.flume.interceptor.TimestampInterceptor$Builder
#定义channels
a1.channels.c1.type = memory
a1.channels.c1.capacity = 20000
a1.channels.c1.transactionCapacity = 10000
#定义sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path=hdfs://192.168.52.100:8020/source/logs/%{type}/%Y%m%d
#组装source、channel、sink
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
创建文件添加脚本
vim mkdata2.sh
脚本数据
#!/bin/bash
while true
do
date >> /export/taillogs/access.log;
date >> /export/taillogs/web.log;
date >> /export/taillogs/nginx.log;
sleep 0.5;
done
执行
bin/flume-ng agent -c conf -f tmpconf/g3.conf -n a1 -Dflume.root.logger=INFO,console
bin/flume-ng agent -c conf -f tmpconf/g1.conf -n a1 -Dflume.root.logger=INFO,console
bin/flume-ng agent -c conf -f tmpconf/g2.conf -n a1 -Dflume.root.logger=INFO,console