生产环境下将收集到的日志上传至s3,采用多个spoolDir soure 和多个hdfs sink的方式是为了提高读取数据,上传数据的吞吐量。
clog.sources = source_log1 source_log2
clog.channels = channel_log
clog.sinks = sink_log1 sink_log2 sink_log3 sink_log4 sink_log5 sink_log6
clog.sources.source_log1.type = spooldir
clog.sources.source_log1.spoolDir = /home/data/log1
clog.sources.source_log1.deletePolicy = immediate
clog.sources.source_log1.batchSize = 1000
clog.sources.source_log1.deserializer.maxLineLength = 999999
clog.sources.source_log1.basenameHeader = true
clog.sources.source_log1.ignorePattern = ^[^0-9].*
clog.sources.source_log1.decodeErrorPolicy = IGNORE
clog.sources.source_log1.interceptors = i1
clog.sources.source_log1.interceptors.i1.type = org.apache.flume.interceptor.RegexExtractorHeaderInterceptor$Builder
clog.sources.source_log1.interceptors.i1.regex = (\\d{8})(\\d{2})(\\d{2})-(.*)-(.*)-(.*)-(.*)-(.*)\\.log
clog.sources.source_log1.interceptors.i1.serializers=s1 s2 s3 s4 s5 s6 s7 s8
clog.sources.source_log1.interceptors.i1.serializers.s1.name=day
clog.sources.source_log1.interceptors.i1.serializers.s2.name=hour
clog.sources.source_log1.interceptors.i1.serializers.s3.name=minute
clog.sources.source_log1.interceptors.i1.serializers.s4.name=project
clog.sources.source_log1.interceptors.i1.serializers.s5.name=machine
clog.sources.source_log1.interceptors.i1.serializers.s6.name=region
clog.sources.source_log1.interceptors.i1.serializers.s7.name=module
clog.sources.source_log1.interceptors.i1.serializers.s8.name=service
clog.sources.source_log1.channels = channel_log
clog.sources.source_log2.type = spooldir
clog.sources.source_log2.spoolDir = /home/data/log2
clog.sources.source_log2.deletePolicy = immediate
clog.sources.source_log2.batchSize = 1000
clog.sources.source_log2.deserializer.maxLineLength = 999999
clog.sources.source_log2.basenameHeader = true
clog.sources.source_log2.ignorePattern = ^[^0-9].*
clog.sources.source_log2.decodeErrorPolicy = IGNORE
clog.sources.source_log2.interceptors = i1
clog.sources.source_log2.interceptors.i1.type = org.apache.flume.interceptor.RegexExtractorHeaderInterceptor$Builder
clog.sources.source_log2.interceptors.i1.regex = (\\d{8})(\\d{2})(\\d{2})-(.*)-(.*)-(.*)-(.*)-(.*)\\.log
clog.sources.source_log2.interceptors.i1.serializers=s1 s2 s3 s4 s5 s6 s7 s8
clog.sources.source_log2.interceptors.i1.serializers.s1.name=day
clog.sources.source_log2.interceptors.i1.serializers.s2.name=hour
clog.sources.source_log2.interceptors.i1.serializers.s3.name=minute
clog.sources.source_log2.interceptors.i1.serializers.s4.name=project
clog.sources.source_log2.interceptors.i1.serializers.s5.name=machine
clog.sources.source_log2.interceptors.i1.serializers.s6.name=region
clog.sources.source_log2.interceptors.i1.serializers.s7.name=module
clog.sources.source_log2.interceptors.i1.serializers.s8.name=service
clog.sources.source_log2.channels = channel_log
clog.sinks.sink_log1.type = hdfs
clog.sinks.sink_log1.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log1.hdfs.filePrefix = %{minute}
clog.sinks.sink_log1.hdfs.fileSuffix = .1.lzo
clog.sinks.sink_log1.hdfs.rollSize = 0
clog.sinks.sink_log1.hdfs.rollCount = 0
clog.sinks.sink_log1.hdfs.rollInterval = 0
clog.sinks.sink_log1.hdfs.idleTimeout = 180
clog.sinks.sink_log1.hdfs.callTimeout = 600000
clog.sinks.sink_log1.hdfs.batchSize = 1000
clog.sinks.sink_log1.hdfs.codeC = lzop
clog.sinks.sink_log1.hdfs.fileType = CompressedStream
clog.sinks.sink_log1.hdfs.writeFormat = Text
clog.sinks.sink_log1.channel = channel_log
clog.sinks.sink_log2.type = hdfs
clog.sinks.sink_log2.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log2.hdfs.filePrefix = %{minute}
clog.sinks.sink_log2.hdfs.fileSuffix = .2.lzo
clog.sinks.sink_log2.hdfs.rollSize = 0
clog.sinks.sink_log2.hdfs.rollCount = 0
clog.sinks.sink_log2.hdfs.rollInterval = 0
clog.sinks.sink_log2.hdfs.idleTimeout = 180
clog.sinks.sink_log2.hdfs.callTimeout = 600000
clog.sinks.sink_log2.hdfs.batchSize = 1000
clog.sinks.sink_log2.hdfs.codeC = lzop
clog.sinks.sink_log2.hdfs.fileType = CompressedStream
clog.sinks.sink_log2.hdfs.writeFormat = Text
clog.sinks.sink_log2.channel = channel_log
clog.sinks.sink_log3.type = hdfs
clog.sinks.sink_log3.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log3.hdfs.filePrefix = %{minute}
clog.sinks.sink_log3.hdfs.fileSuffix = .3.lzo
clog.sinks.sink_log3.hdfs.rollSize = 0
clog.sinks.sink_log3.hdfs.rollCount = 0
clog.sinks.sink_log3.hdfs.rollInterval = 0
clog.sinks.sink_log3.hdfs.idleTimeout = 180
clog.sinks.sink_log3.hdfs.callTimeout = 600000
clog.sinks.sink_log3.hdfs.batchSize = 1000
clog.sinks.sink_log3.hdfs.codeC = lzop
clog.sinks.sink_log3.hdfs.fileType = CompressedStream
clog.sinks.sink_log3.hdfs.writeFormat = Text
clog.sinks.sink_log3.channel = channel_log
clog.sinks.sink_log4.type = hdfs
clog.sinks.sink_log4.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log4.hdfs.filePrefix = %{minute}
clog.sinks.sink_log4.hdfs.fileSuffix = .4.lzo
clog.sinks.sink_log4.hdfs.rollSize = 0
clog.sinks.sink_log4.hdfs.rollCount = 0
clog.sinks.sink_log4.hdfs.rollInterval = 0
clog.sinks.sink_log4.hdfs.idleTimeout = 180
clog.sinks.sink_log4.hdfs.callTimeout = 600000
clog.sinks.sink_log4.hdfs.batchSize = 1000
clog.sinks.sink_log4.hdfs.codeC = lzop
clog.sinks.sink_log4.hdfs.fileType = CompressedStream
clog.sinks.sink_log4.hdfs.writeFormat = Text
clog.sinks.sink_log4.channel = channel_log
clog.sinks.sink_log5.type = hdfs
clog.sinks.sink_log5.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log5.hdfs.filePrefix = %{minute}
clog.sinks.sink_log5.hdfs.fileSuffix = .5.lzo
clog.sinks.sink_log5.hdfs.rollSize = 0
clog.sinks.sink_log5.hdfs.rollCount = 0
clog.sinks.sink_log5.hdfs.rollInterval = 0
clog.sinks.sink_log5.hdfs.idleTimeout = 180
clog.sinks.sink_log5.hdfs.callTimeout = 600000
clog.sinks.sink_log5.hdfs.batchSize = 1000
clog.sinks.sink_log5.hdfs.codeC = lzop
clog.sinks.sink_log5.hdfs.fileType = CompressedStream
clog.sinks.sink_log5.hdfs.writeFormat = Text
clog.sinks.sink_log5.channel = channel_log
clog.sinks.sink_log6.type = hdfs
clog.sinks.sink_log6.hdfs.path = s3n://aws_access_key_idaws_access_key_id:aws_secret_access_key/%{service}/%{day}/%{hour}
clog.sinks.sink_log6.hdfs.filePrefix = %{minute}
clog.sinks.sink_log6.hdfs.fileSuffix = .6.lzo
clog.sinks.sink_log6.hdfs.rollSize = 0
clog.sinks.sink_log6.hdfs.rollCount = 0
clog.sinks.sink_log6.hdfs.rollInterval = 0
clog.sinks.sink_log6.hdfs.idleTimeout = 180
clog.sinks.sink_log6.hdfs.callTimeout = 600000
clog.sinks.sink_log6.hdfs.batchSize = 1000
clog.sinks.sink_log6.hdfs.codeC = lzop
clog.sinks.sink_log6.hdfs.fileType = CompressedStream
clog.sinks.sink_log6.hdfs.writeFormat = Text
clog.sinks.sink_log6.channel = channel_log
clog.channels.channel_log.type = memory
clog.channels.channel_log.capacity = 100000
clog.channels.channel_log.transactionCapacity = 10000