使用flume从kafka中的topic取得数据,然后存入hbase和es中

接上一篇博客,将数据进行处理!!!!!!!!!!!!

#HBASE

tier2.sources  = HbaseAuditSource HbaseRunSource HdfsAuditSources HdfsRunSources HiveAuditSources HiveRunSources StormWorkerSources StormRunSources YarnAuditSources YarnRunSources
tier2.channels = HbaseAuditChannel HbaseRunChannel HdfsAuditChannel HdfsRunChannel HiveAuditChannel HiveRunChannel StormWorkerChannel StormRunChannel YarnAuditChannel YarnRunChannel
tier2.sinks    = HbaseAuditSink HbaseRunSink HdfsAuditSink HdfsRunSink HiveAuditSink HiveRunSink StormWorkerSink StormRunSink YarnAuditSink YarnRunSink


tier2.sources.HbaseAuditSource.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.HbaseAuditSource.channels = HbaseAuditChannel
tier2.sources.HbaseAuditSource.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.HbaseAuditSource.topic = AUDIT_HBASE_WC
tier2.sources.HbaseAuditSource.groupId = flume
tier2.sources.HbaseAuditSource.batchSize=1
tier2.sources.HbaseAuditSource.kafka.consumer.timeout.ms = 100


tier2.sources.HbaseRunSource.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.HbaseRunSource.channels = HbaseRunChannel
tier2.sources.HbaseRunSource.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.HbaseRunSource.topic = RUN_HBASE_WC
tier2.sources.HbaseRunSource.groupId = flume
tier2.sources.HbaseRunSource.batchSize=1
tier2.sources.HbaseRunSource.kafka.consumer.timeout.ms = 100
tier2.sources.HbaseRunSource.interceptors = i1
tier2.sources.HbaseRunSource.interceptors.i1.type=regex_extractor
tier2.sources.HbaseRunSource.interceptors.i1.regex = serverip=(.*?),datatye=(.*?),([\\d\\-\\s:,]{23}).*  
tier2.sources.HbaseRunSource.interceptors.i1.serializers = s1 s2 s3  
tier2.sources.HbaseRunSource.interceptors.i1.serializers.s1.name= serverip   
tier2.sources.HbaseRunSource.interceptors.i1.serializers.s2.name= datatype
tier2.sources.HbaseRunSource.interceptors.i1.serializers.s3.name= time






tier2.sources.HdfsAuditSources.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.HdfsAuditSources.channels = HdfsAuditChannel
tier2.sources.HdfsAuditSources.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.HdfsAuditSources.topic = AUDIT_HDFS_WC
tier2.sources.HdfsAuditSources.groupId = flume
tier2.sources.HdfsAuditSources.batchSize=1
tier2.sources.HdfsAuditSources.kafka.consumer.timeout.ms = 100


tier2.sources.HdfsRunSources.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.HdfsRunSources.channels = HdfsRunChannel
tier2.sources.HdfsRunSources.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.HdfsRunSources.topic = RUN_HDFS_WC
tier2.sources.HdfsRunSources.groupId = flume
tier2.sources.HdfsRunSources.batchSize=1
tier2.sources.HdfsRunSources.kafka.consumer.timeout.ms = 100
tier2.sources.HdfsRunSources.interceptors = i1
tier2.sources.HdfsRunSources.interceptors.i1.type=regex_extractor
tier2.sources.HdfsRunSources.interceptors.i1.regex = serverip=(.*?),datatye=(.*?),([\\d\\-\\s:,]{23}).*  
tier2.sources.HdfsRunSources.interceptors.i1.serializers = s1 s2 s3  
tier2.sources.HdfsRunSources.interceptors.i1.serializers.s1.name= serverip   
tier2.sources.HdfsRunSources.interceptors.i1.serializers.s2.name= datatype
tier2.sources.HdfsRunSources.interceptors.i1.serializers.s3.name= time






tier2.sources.HiveAuditSources.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.HiveAuditSources.channels = HiveAuditChannel
tier2.sources.HiveAuditSources.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.HiveAuditSources.topic = AUDIT_HIVE_WC
tier2.sources.HiveAuditSources.groupId = flume
tier2.sources.HiveAuditSources.batchSize=1
tier2.sources.HiveAuditSources.kafka.consumer.timeout.ms = 100


tier2.sources.HiveRunSources.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.HiveRunSources.channels = HiveRunChannel
tier2.sources.HiveRunSources.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.HiveRunSources.topic = RUN_HIVE_WC
tier2.sources.HiveRunSources.groupId = flume
tier2.sources.HiveRunSources.batchSize=1
tier2.sources.HiveRunSources.kafka.consumer.timeout.ms = 100
tier2.sources.HiveRunSources.interceptors = i1
tier2.sources.HiveRunSources.interceptors.i1.type=regex_extractor
tier2.sources.HiveRunSources.interceptors.i1.regex = serverip=(.*?),datatye=(.*?),([\\d\\-\\s:,]{23}).*  
tier2.sources.HiveRunSources.interceptors.i1.serializers = s1 s2 s3  
tier2.sources.HiveRunSources.interceptors.i1.serializers.s1.name= serverip   
tier2.sources.HiveRunSources.interceptors.i1.serializers.s2.name= datatype
tier2.sources.HiveRunSources.interceptors.i1.serializers.s3.name= time






tier2.sources.StormWorkerSources.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.StormWorkerSources.channels = StormWorkerChannel
tier2.sources.StormWorkerSources.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.StormWorkerSources.topic = AUDIT_STORM_WC
tier2.sources.StormWorkerSources.groupId = flume
tier2.sources.StormWorkerSources.batchSize=1
tier2.sources.StormWorkerSources.kafka.consumer.timeout.ms = 100


tier2.sources.StormRunSources.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.StormRunSources.channels = StormRunChannel
tier2.sources.StormRunSources.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.StormRunSources.topic = RUN_STORM_WC
tier2.sources.StormRunSources.groupId = flume
tier2.sources.StormRunSources.batchSize=1
tier2.sources.StormRunSources.kafka.consumer.timeout.ms = 100
tier2.sources.StormRunSources.interceptors = i1
tier2.sources.StormRunSources.interceptors.i1.type=regex_extractor
tier2.sources.StormRunSources.interceptors.i1.regex = serverip=(.*?),datatye=(.*?),([\\d\\-\\s:.]{23}).*  
tier2.sources.StormRunSources.interceptors.i1.serializers = s1 s2 s3  
tier2.sources.StormRunSources.interceptors.i1.serializers.s1.name= serverip   
tier2.sources.StormRunSources.interceptors.i1.serializers.s2.name= datatype
tier2.sources.StormRunSources.interceptors.i1.serializers.s3.name= time




#YARN
tier2.sources.YarnAuditSources.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.YarnAuditSources.channels = YarnAuditChannel
tier2.sources.YarnAuditSources.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.YarnAuditSources.topic = AUDIT_YARN_WC
tier2.sources.YarnAuditSources.groupId = flume
tier2.sources.YarnAuditSources.batchSize=1
tier2.sources.YarnAuditSources.kafka.consumer.timeout.ms = 100


tier2.sources.YarnRunSources.type = org.apache.flume.source.kafka.KafkaSource
tier2.sources.YarnRunSources.channels = YarnRunChannel
tier2.sources.YarnRunSources.zookeeperConnect = *.*.*.*:2181/kafka-test
tier2.sources.YarnRunSources.topic = RUN_YARN_WC
tier2.sources.YarnRunSources.groupId = flume
tier2.sources.YarnRunSources.batchSize=1
tier2.sources.YarnRunSources.kafka.consumer.timeout.ms = 100
tier2.sources.YarnRunSources.interceptors = i1
tier2.sources.YarnRunSources.interceptors.i1.type=regex_extractor
tier2.sources.YarnRunSources.interceptors.i1.regex = serverip=(.*?),datatye=(.*?),([\\d\\-\\s:,]{23}).*  
tier2.sources.YarnRunSources.interceptors.i1.serializers = s1 s2 s3  
tier2.sources.YarnRunSources.interceptors.i1.serializers.s1.name= serverip   
tier2.sources.YarnRunSources.interceptors.i1.serializers.s2.name= datatype
tier2.sources.YarnRunSources.interceptors.i1.serializers.s3.name= time






tier2.sinks.HbaseAuditSink.type = hbase
tier2.sinks.HbaseAuditSink.table = audit_hbase_wc
tier2.sinks.HbaseAuditSink.columnFamily = f1
tier2.sinks.HbaseAuditSink.batchSize=1
tier2.sinks.HbaseAuditSink.serializer = org.apache.flume.sink.hbase.RegexHbaseEventSerializer
tier2.sinks.HbaseAuditSink.serializer.regex = serverip=(.*?),datatye=(.*?),([\\d\\-\\s:,]{23}).*/(.*?);\\srequest:(.*?);.*user=(.*?),\\sscope=(.*?),.* 
tier2.sinks.HbaseAuditSink.serializer.colNames = serverip,datatype,requestdate,clientip,operation,requestuser,accessdatafile
tier2.sinks.HbaseAuditSink.channel = HbaseAuditChannel


tier2.sinks.HbaseRunSink.type = org.apache.flume.sink.elasticsearch.ElasticSearchSink
tier2.sinks.HbaseRunSink.hostNames = *.*.*.*:9300
tier2.sinks.HbaseRunSink.indexName = run_hbase_wc
tier2.sinks.HbaseRunSink.clusterName = fe8734cb-8e5d-476a-9aa6-19ee459e15a6
tier2.sinks.HbaseRunSink.batchSize = 1
tier2.sinks.HbaseRunSink.channel = HbaseRunChannel




tier2.sinks.HdfsAuditSink.type = hbase
tier2.sinks.HdfsAuditSink.table = audit_hdfs_wc
tier2.sinks.HdfsAuditSink.columnFamily = f1
tier2.sinks.HdfsAuditSink.batchSize=1
tier2.sinks.HdfsAuditSink.serializer = org.apache.flume.sink.hbase.RegexHbaseEventSerializer
tier2.sinks.HdfsAuditSink.serializer.regex = serverip=(.*?),datatype=(.*?),([\\d\\-\\s:,]{23})\\s*.*:\\sallowed=(.*?)ugi=(.*?)\\s.*?\\)ip=/(.*?)cmd=(.*?)src=(.*?)dst=(.*?)perm=(.*?)proto=(.*)
tier2.sinks.HdfsAuditSink.serializer.colNames = serverip,datatype,requestdate,operationresult,requestuser,clientip,operation,src,dst,dataowner
tier2.sinks.HdfsAuditSink.channel = HdfsAuditChannel




tier2.sinks.HdfsRunSink.type = org.apache.flume.sink.elasticsearch.ElasticSearchSink
tier2.sinks.HdfsRunSink.hostNames = *.*.*.*:9300
tier2.sinks.HdfsRunSink.indexName = run_hdfs_wc
#tier2.sinks.HdfsRunSink.indexType = message
tier2.sinks.HdfsRunSink.clusterName = fe8734cb-8e5d-476a-9aa6-19ee459e15a6
tier2.sinks.HdfsRunSink.batchSize = 1
#tier2.sinks.HdfsRunSink.ttl = 5d
tier2.sinks.HdfsRunSink.channel = HdfsRunChannel






tier2.sinks.HiveAuditSink.type = hbase
tier2.sinks.HiveAuditSink.table = audit_hive_wc
tier2.sinks.HiveAuditSink.columnFamily = f1
tier2.sinks.HiveAuditSink.batchSize=1
tier2.sinks.HiveAuditSink.serializer = org.apache.flume.sink.hbase.RegexHbaseEventSerializer
tier2.sinks.HiveAuditSink.serializer.regex =  serverip=(.*?),datatype=(.*?),.*"serviceName":(.*?),\\s"username":(.*?),\\s"impersonator":(.*?),\\s"ipAddress":(.*?),\\s"operation":(.*?),\\s"eventTime":(.*?),\\s"operationText":(.*?),\\s"allowed":(.*?),\\s"databaseName":(.*?),\\s"tableName":(.*?),\\s"resourcePath"(.*?),\\s"objectType":(.*?)*
tier2.sinks.HiveAuditSink.serializer.colNames = serverip,datatype,requestuser,username,impersonator,clientip,operation,eventtime,operationtext,operationresult,databaseName,tableName,resourcePath,objectType
tier2.sinks.HiveAuditSink.channel = HiveAuditChannel


tier2.sinks.HiveRunSink.type = org.apache.flume.sink.elasticsearch.ElasticSearchSink
tier2.sinks.HiveRunSink.hostNames = *.*.*.*:9300
tier2.sinks.HiveRunSink.indexName = run_hive_wc
tier2.sinks.HiveRunSink.clusterName = fe8734cb-8e5d-476a-9aa6-19ee459e15a6
tier2.sinks.HiveRunSink.batchSize = 1
tier2.sinks.HiveRunSink.channel = HiveRunChannel




tier2.sinks.StormWorkerSink.type = hbase
tier2.sinks.StormWorkerSink.table = audit_storm_wc
tier2.sinks.StormWorkerSink.columnFamily = f1
tier2.sinks.StormWorkerSink.batchSize=1
tier2.sinks.StormWorkerSink.serializer = org.apache.flume.sink.hbase.RegexHbaseEventSerializer
tier2.sinks.StormWorkerSink.serializer.regex = serverip=(.*?),datatype=(.*?),([\\d\\-\\s:.]{23})\\s.*,\\s.*,\\sattempt=(.*?)\\s.*,\\slast exception:(.*?)\\son.*
tier2.sinks.StormWorkerSink.serializer.colNames = serverip,datatype,requestdate,attempt,lastexception
tier2.sinks.StormWorkerSink.channel = StormWorkerChannel




tier2.sinks.StormRunSink.type = org.apache.flume.sink.elasticsearch.ElasticSearchSink
tier2.sinks.StormRunSink.hostNames = *.*.*.*:9300
tier2.sinks.StormRunSink.indexName = run_storm_wc
tier2.sinks.StormRunSink.clusterName = fe8734cb-8e5d-476a-9aa6-19ee459e15a6
tier2.sinks.StormRunSink.batchSize = 1
tier2.sinks.StormRunSink.channel = StormRunChannel






tier2.sinks.YarnAuditSink.type = hbase
tier2.sinks.YarnAuditSink.table = audit_yarn_wc
tier2.sinks.YarnAuditSink.columnFamily = f1
tier2.sinks.YarnAuditSink.batchSize=1
tier2.sinks.YarnAuditSink.serializer = org.apache.flume.sink.hbase.RegexHbaseEventSerializer
tier2.sinks.YarnAuditSink.serializer.regex = serverip=(.*?),datatype=(.*?),([\\d\\-\\s:,]{23})\\s*.*:\\sUSER=(.*?)IP=(.*?)OPERATION=(.*?)TARGET=(.*?)RESULT=(.*?)APPID=(.*)
tier2.sinks.YarnAuditSink.serializer.colNames = serverip,datatype,requestdate,requestuser,clientip,operation,target,operationresult,APPID
tier2.sinks.YarnAuditSink.channel = YarnAuditChannel




tier2.sinks.YarnRunSink.type = org.apache.flume.sink.elasticsearch.ElasticSearchSink
tier2.sinks.YarnRunSink.hostNames = *.*.*.*:9300
tier2.sinks.YarnRunSink.indexName = run_yarn_wc
#tier2.sinks.YarnRunSink.indexType = message
tier2.sinks.YarnRunSink.clusterName = fe8734cb-8e5d-476a-9aa6-19ee459e15a6
tier2.sinks.YarnRunSink.batchSize = 1
#tier2.sinks.YarnRunSink.ttl = 5d
tier2.sinks.YarnRunSink.channel = YarnRunChannel








tier2.channels.HbaseAuditChannel.type = memory
tier2.channels.HbaseAuditChannel.capacity = 10000
tier2.channels.HbaseAuditChannel.transactionCapacity=1000
tier2.channels.HbaseAuditChannel.byteCapacityBufferPercentage=20


tier2.channels.HbaseRunChannel.type = memory
tier2.channels.HbaseRunChannel.capacity = 10000
tier2.channels.HbaseRunChannel.transactionCapacity=1000
tier2.channels.HbaseRunChannel.byteCapacityBufferPercentage=20




tier2.channels.HdfsAuditChannel.type = memory
tier2.channels.HdfsAuditChannel.capacity = 10000
tier2.channels.HdfsAuditChannel.transactionCapacity=1000
tier2.channels.HdfsAuditChannel.byteCapacityBufferPercentage=20


tier2.channels.HdfsRunChannel.type = memory
tier2.channels.HdfsRunChannel.capacity = 10000
tier2.channels.HdfsRunChannel.transactionCapacity=1000
tier2.channels.HdfsRunChannel.byteCapacityBufferPercentage=20




tier2.channels.HiveAuditChannel.type = memory
tier2.channels.HiveAuditChannel.capacity = 10000
tier2.channels.HiveAuditChannel.transactionCapacity=1000
tier2.channels.HiveAuditChannel.byteCapacityBufferPercentage=20


tier2.channels.HiveRunChannel.type = memory
tier2.channels.HiveRunChannel.capacity = 10000
tier2.channels.HiveRunChannel.transactionCapacity=1000
tier2.channels.HiveRunChannel.byteCapacityBufferPercentage=20




tier2.channels.StormWorkerChannel.type = memory
tier2.channels.StormWorkerChannel.capacity = 10000
tier2.channels.StormWorkerChannel.transactionCapacity=1000
tier2.channels.StormWorkerChannel.byteCapacityBufferPercentage=20


tier2.channels.StormRunChannel.type = memory
tier2.channels.StormRunChannel.capacity = 10000
tier2.channels.StormRunChannel.transactionCapacity=1000
tier2.channels.StormRunChannel.byteCapacityBufferPercentage=20




tier2.channels.YarnAuditChannel.type = memory
tier2.channels.YarnAuditChannel.capacity = 10000
tier2.channels.YarnAuditChannel.transactionCapacity=1000
tier2.channels.YarnAuditChannel.byteCapacityBufferPercentage=20


tier2.channels.YarnRunChannel.type = memory
tier2.channels.YarnRunChannel.capacity = 10000
tier2.channels.YarnRunChannel.transactionCapacity=1000
tier2.channels.YarnRunChannel.byteCapacityBufferPercentage=20

你可能感兴趣的:(使用flume从kafka中的topic取得数据,然后存入hbase和es中)