Flume参数配置详解
一、agent
#sources、channels、sinks 名称定义
agent_1.sources = weixin_source_from_kafka
agent_1.channels = weixin_channel1
agent_1.sinks = weixin_sinks1 weixin_sinks2
二、分组
#set group,组名称为“g1”
agent_1.sinkgroups = g1
#set sinks group ,weixin_sinks1 weixin_sinks2归为一组
agent_1.sinkgroups.g1.sinks = weixin_sinks1 weixin_sinks2
#故障转移failover
agent_1.sinkgroups.g1.processor.type =failover
#权重
agent_1.sinkgroups.g1.processor.priority.weixin_sinks1 = 10
agent_1.sinkgroups.g1.processor.priority.weixin_sinks2 = 5
agent_1.sinkgroups.g1.processor.maxpenalty= 1000
三、sources
## For each one of the sources, the type is defined
#数据源类型:org.apache.flume.source.kafka.KafkaSource
agent_1.sources.weixin_source_from_kafka.type = org.apache.flume.source.kafka.KafkaSource
#brokerlist
agent_1.sources.weixin_source_from_kafka.kafka.bootstrap.servers = weixin-1:6667,weixin-6:6667,weixin-7:6667
#topic名称,kafka中配置的topic
agent_1.sources.weixin_source_from_kafka.topic = weixin
#每次从kafka拉取的数据量
agent_1.sources.weixin_source_from_kafka.batchSize = 6000
#消费组名称
agent_1.sources.weixin_source_from_kafka.groupId = wxf_updown_agent
#source数据缓存channel
agent_1.sources.weixin_source_from_kafka.channels = weixin_channel1
#过滤器名称
agent_1.sources.weixin_source_from_kafka.interceptors= i1
#自定义过滤器类
agent_1.sources.weixin_source_from_kafka.interceptors.i1.type=cn.yivew.weixin.interceptor.hdfs.HdfsBaseInterceptor$Builder
四、channels
#Specify the channel the sink should use
# Each channel's type is defined.
#所有event字节数之和的最大值(只包含body)
agent_1.channels.weixin_channel1.byteCapacity= 131072000
#所有event的header字节数占byteCapacity的百分比
agent_1.channels.weixin_channel1.byteCapacityBufferPercentage = 20
#缓存最大容量数据量为:60000
agent_1.channels.weixin_channel1.capacity = 60000
#最大连接时长
agent_1.channels.weixin_channel1.keep-alive= 30
#每一次事务容量,参数配置一定要大于batchSize
agent_1.channels.weixin_channel1.transactionCapacity=12000
#缓存类型为: memory
agent_1.channels.weixin_channel1.type=memory
#自动提交为:false
agent_1.channels.weixin_channel1.kafka.consumer.enable.auto.commit = false
五、sinks
#channel名称
agent_1.sinks.weixin_sinks1.channel = weixin_channel1
数据下沉类型
agent_1.sinks.weixin_sinks1.type = hdfs
#生成文件名的规则
agent_1.sinks.weixin_sinks1.hdfs.filePrefix = weixin_%H
#生成文件的后缀
agent_1.sinks.weixin_sinks1.hdfs.fileSuffix = .txt
#写入hdfs目录
agent_1.sinks.weixin_sinks1.hdfs.path = hdfs://sx-wx/application/offline/weixin/wxf/base/%{actiontype}/%Y%m%d
#使用本地时间
agent_1.sinks.weixin_sinks1.hdfs.useLocalTimeStamp = true
### roll every hour (after gz)
#设置每个文件的滚动大小
agent_1.sinks.weixin_sinks1.hdfs.rollSize = 131072000
#0为不以条目数滚动
agent_1.sinks.weixin_sinks1.hdfs.rollCount = 0
#间隔多久生成一个新文件
agent_1.sinks.weixin_sinks1.hdfs.rollInterval = 600
#每批写入hdfs的数据量
agent_1.sinks.weixin_sinks1.hdfs.batchSize= 6000
#每个事务的线程数
agent_1.sinks.weixin_sinks1.hdfs.threadsPoolSize = 10
#文件多久没有读写就会滚动为目标文件
agent_1.sinks.weixin_sinks1.hdfs.callTimeout = 600000
agent_1.sinks.weixin_sinks1.hdfs.fileType=DataStream
agent_1.sinks.weixin_sinks1.hdfs.writeFormat=Text