Flume参数配置详解

Flume参数配置详解

、agent

#sourceschannelssinks 名称定义

agent_1.sources = weixin_source_from_kafka

agent_1.channels = weixin_channel1

agent_1.sinks = weixin_sinks1 weixin_sinks2

分组

#set group名称为“g1”

agent_1.sinkgroups = g1

#set sinks group weixin_sinks1 weixin_sinks2归为一组

agent_1.sinkgroups.g1.sinks = weixin_sinks1 weixin_sinks2

#故障转移failover

agent_1.sinkgroups.g1.processor.type =failover

#权重

agent_1.sinkgroups.g1.processor.priority.weixin_sinks1 = 10

agent_1.sinkgroups.g1.processor.priority.weixin_sinks2 = 5

agent_1.sinkgroups.g1.processor.maxpenalty= 1000

三、sources

## For each one of the sources, the type is defined

#数据源类型org.apache.flume.source.kafka.KafkaSource

agent_1.sources.weixin_source_from_kafka.type = org.apache.flume.source.kafka.KafkaSource

#brokerlist

agent_1.sources.weixin_source_from_kafka.kafka.bootstrap.servers = weixin-1:6667,weixin-6:6667,weixin-7:6667

#topic名称,kafka中配置的topic

agent_1.sources.weixin_source_from_kafka.topic = weixin

#每次从kafka拉取数据量

agent_1.sources.weixin_source_from_kafka.batchSize = 6000

#消费组名称

agent_1.sources.weixin_source_from_kafka.groupId = wxf_updown_agent

#source数据缓存channel

agent_1.sources.weixin_source_from_kafka.channels = weixin_channel1

#过滤器名称

agent_1.sources.weixin_source_from_kafka.interceptors= i1

#自定义过滤

agent_1.sources.weixin_source_from_kafka.interceptors.i1.type=cn.yivew.weixin.interceptor.hdfs.HdfsBaseInterceptor$Builder

、channels

#Specify the channel the sink should use

# Each channel's type is defined.

#所有event字节数之和的最大值(包含body)

agent_1.channels.weixin_channel1.byteCapacity= 131072000

#所有event的header字节数byteCapacity的百分比

agent_1.channels.weixin_channel1.byteCapacityBufferPercentage = 20

#缓存最大容量数据量为60000

agent_1.channels.weixin_channel1.capacity = 60000

#最大连接时长

agent_1.channels.weixin_channel1.keep-alive= 30

#每一次事务容量,参数配置一定要大于batchSize

agent_1.channels.weixin_channel1.transactionCapacity=12000

#缓存类型: memory

agent_1.channels.weixin_channel1.type=memory

#自动提交为:false

agent_1.channels.weixin_channel1.kafka.consumer.enable.auto.commit = false

 

、sinks

#channel名称

agent_1.sinks.weixin_sinks1.channel = weixin_channel1

数据下沉类型

agent_1.sinks.weixin_sinks1.type = hdfs

#生成文件名的规则

agent_1.sinks.weixin_sinks1.hdfs.filePrefix = weixin_%H

#生成文件的后缀

agent_1.sinks.weixin_sinks1.hdfs.fileSuffix = .txt

#写入hdfs目录

agent_1.sinks.weixin_sinks1.hdfs.path = hdfs://sx-wx/application/offline/weixin/wxf/base/%{actiontype}/%Y%m%d

#使用本地时间

agent_1.sinks.weixin_sinks1.hdfs.useLocalTimeStamp = true

### roll every hour (after gz)

#设置每个文件的滚动大小 

agent_1.sinks.weixin_sinks1.hdfs.rollSize = 131072000

#0为不以条目数滚动

agent_1.sinks.weixin_sinks1.hdfs.rollCount = 0

#间隔多久生成一个新文件

agent_1.sinks.weixin_sinks1.hdfs.rollInterval = 600

#每写入hdfs的数据量

agent_1.sinks.weixin_sinks1.hdfs.batchSize= 6000

#每个事务的线程数

agent_1.sinks.weixin_sinks1.hdfs.threadsPoolSize = 10

#文件多久没有读写就会滚动为目标文件

agent_1.sinks.weixin_sinks1.hdfs.callTimeout = 600000

agent_1.sinks.weixin_sinks1.hdfs.fileType=DataStream

agent_1.sinks.weixin_sinks1.hdfs.writeFormat=Text

你可能感兴趣的:(Flume参数配置详解)