Ngnix 配置:
# Name the components on this agent
a1.sources = r
a1.sinks = k_kafka
a1.channels = c_mem
# Channels info
a1.channels.c_mem.type = memory
a1.channels.c_mem.capacity = 2000
a1.channels.c_mem.transactionCapacity = 300
a1.channels.c_mem.keep-alive = 60
# Sources info
a1.sources.r.type = spooldir
a1.sources.r.channels = c_mem
a1.sources.r.spoolDir = /home/litao/avro_file/
a1.sources.r.fileHeader = true
a1.sources.r.deserializer = avro
# Sinksinfo
a1.sinks.k_kafka.type = avro
a1.sinks.k_kafka.hostname = localhost
a1.sinks.k_kafka.port = 55555
a1.sinks.k_kafka.channel = c_mem
Kafka 配置:
# Name the components on this agent
a1.sources = r1
a1.channels = c1
a1.sinks = k1
# Sources info
a1.sources.r1.channels = c1
a1.sources.r1.type = avro
a1.sources.r1.bind = localhost
a1.sources.r1.port = 55555
# Channels info
a1.channels.c1.type = memory
a1.channels.c1.capacity = 2000
a1.channels.c1.transactionCapacity =500
a1.channels.c1.keep-alive = 50
# Sinksinfo
a1.sinks.k1.channel = c1
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.bootstrap.servers = kafka1:9093,kafka2:9093,kafka3:9093,kafka4:9093,kafka5:9093,kafka6:9093
a1.sinks.k1.kafka.topic = test_2018-03-14
a1.sinks.k1.kafka.flumeBatchSize = 5
a1.sinks.k1.kafka.producer.acks =1
HDFS 配置:
# Name the components on this agent
a1.sources = r1
a1.channels = c1
a1.sinks = k1
# Sources info
a1.sources.r1.channels = c1
a1.sources.r1.type = com.bigo.flume.source.kafka.KafkaSource
a1.sources.r1.kafka.bootstrap.servers = kafka1:9093,kafka2:9093,kafka3:9093,kafka4:9093,kafka5:9093,kafka6:9093
a1.sources.r1.kafka.topics = test_2018-03-14
a1.sources.r1.kafka.consumer.group.id = test_2018-03-14.conf_flume_group
a1.sources.r1.kafka.consumer.timeout.ms = 100
#Inject the Schema into the header so the AvroEventSerializer can pick it up
a1.sources.r1.interceptors=i1
a1.sources.r1.interceptors.i1.type = static
a1.sources.r1.interceptors.i1.key=flume.avro.schema.url
a1.sources.r1.interceptors.i1.value=hdfs://bigocluster/user/litao/litao.avsc
# Channels info
a1.channels.c1.type = memory
a1.channels.c1.capacity = 5000
a1.channels.c1.transactionCapacity =1000
a1.channels.c1.keep-alive = 50
# Sinksinfo
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.serializer = org.apache.flume.serialization.AvroEventSerializer$Builder
a1.sinks.k1.hdfs.writeFormat=Text
a1.sinks.k1.hdfs.path = hdfs://bigocluster/flume/bigolive/test_2018-03-14
a1.sinks.k1.hdfs.filePrefix = test.%Y-%m-%d
a1.sinks.k1.hdfs.fileSuffix = .avro
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.idleTimeout = 603
a1.sinks.k1.hdfs.useLocalTimeStamp = false
a1.sinks.k1.hdfs.fileType = DataStream
SET hive.exec.compress.output=true;
SET avro.output.codec=snappy;
CREATE EXTERNAL TABLE tmp.test_hdfs_litao
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
WITH SERDEPROPERTIES ('avro.schema.url'='hdfs://bigocluster/user/litao/litao.avsc')
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION 'hdfs://bigocluster/flume/bigolive/test_2018-03-14';
CREATE EXTERNAL TABLE tmp.test_hdfs_litao
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION 'hdfs://bigocluster/flume/bigolive/test_2018-03-14'
TBLPROPERTIES (
'avro.schema.literal'='{
"namespace": "com.howdy",
"name": "some_schema",
"type": "record",
"fields": [ { "name":"name","type":"string"},
{ "name":"age","type":"int"}
]
}'
);
3 结论