高性能实时数仓建设(五):FLUME读取kafka数据推送hdfs上

文章目录

a1.sources = s1
a1.channels = c1
a1.sinks = s1

a1.sources.s1.type = org.apache.flume.source.kafka.KafkaSource
a1.sources.s1.zookeeperConnect = 192.168.18.100:2181,192.168.18.101:2181,192.168.18.102:2181
a1.sources.s1.topic = test01
a1.sources.s1.groupId = test01
a1.sources.s1.channels = c1
a1.sources.s1.interceptors = i1
a1.sources.s1.interceptors.i1.type = timestamp
a1.sources.s1.kafka.consumer.timeout.ms = 1000

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 1000

a1.sinks.s1.type = hdfs
a1.sinks.s1.hdfs.path = /data/kafka/test01/%y-%m-%d
a1.sinks.s1.hdfs.fileType = DataStream
a1.sinks.s1.hdfs.rollSize = 0
a1.sinks.s1.hdfs.rollCount = 0
a1.sinks.s1.hdfs.rollInterval = 30
a1.sinks.s1.channel = c1

[root@note01 conf]# vim flume-env.sh 
export JAVA_HOME=/opt/module/jdk1.8.0_144

# Give Flume more memory and pre-allocate, enable remote monitoring via JMX
# export JAVA_OPTS="-Xms100m -Xmx2000m -Dcom.sun.management.jmxremote"
JAVA_OPTS="-Dflume.monitoring.type=ganglia
-Dflume.monitoring.hosts=192.168.1.102:8649
-Xms100m
-Xmx200m"
# Let Flume write raw event data and configuration information to its log files for debugging
# purposes. Enabling these flags is not recommended in production,
# as it may result in logging sensitive user information or encryption secrets.
# export JAVA_OPTS="$JAVA_OPTS -Dorg.apache.flume.log.rawdata=true -Dorg.apache.flume.log.printconfig=true "  
        
# Note that the Flume conf directory is always included in the classpath.
#FLUME_CLASSPATH="" 
#JAVA_HOME

#HADOOP_HOME
export HADOOP_HOME=/opt/module/hadoop-2.7.2
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
    
#ZK_HOME
export ZK_HOME=/opt/module/zookeeper-3.4.10
export PATH=$PATH:$ZK_HOME/bin


你可能感兴趣的:(高性能实时数仓建设)