flume-kafka-flume-hdfs

将数据建立软链接,并同步到文件夹里,供flume采集

#! /bin/bash

mydate=`date +%Y-%m-%d-%H`

#mydate=`date -d "yesterday" +%Y%m%d`

monitorDir="/data/webapp/d5wx-webapp/logs/monitor/"

filePath="/data/webapp/d5wx-webapp/logs/response/"

fileName="response-""${mydate}"".log"

echo "文件地址:${filePath}"

echo "文件名字:${fileName}"

if [ -f "${monitorDir}""${fileName}" ] && [ -f "${monitorDir}""${fileName}"".OK" ];
then
	echo "---软连接文件存在---"
elif [ ! -f "${monitorDir}""${fileName}" ] && [ ! -f "${monitorDir}""${fileName}"".OK" ] && [ -f "${filePath}""${fileName}" ] ;
then
        echo "---软链接不存在---"
	ln -s "${filePath}""${fileName}" "${monitorDir}""${fileName}"
fi
exit

在生产服务器上的flume配置文件,将日志信息发送到kafka的上

############################################
# 对各个组件的描述说明
# 其中a1为agent的名字
# r1是a1的source的代号名字
# c1是a1的channel的代号名字
# k1是a1的sink的代号名字
############################################
a1.sources = r1
a1.sinks = k1
a1.channels = c1

a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir =/data/adver-webapp/adver-webapp/logs/monitor
a1.sources.r1.fileSuffix = .OK
a1.sources.r1.deletePolicy = never
a1.sources.r1.fileHeader = true

# 用于描述sink,类型是日志格式
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.topic = adver
a1.sinks.k1.brokerList =192.168.152.34:44444
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20

# 用于描述channel,在内存中做数据的临时的存储
a1.channels.c1.type = memory
# 该内存中最大的存储容量,1000个events事件
a1.channels.c1.capacity = 1000
# 能够同时对100个events事件监管事务
a1.channels.c1.transactionCapacity = 100

# 将a1中的各个组件建立关联关系,将source和sink都指向了同一个channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

从kafka的主题topic采集信息,置于hdfs

a1.sources = r1
a1.channels = ch1
a1.sinks = s1
 
 
# The channel can be defined as follows.
a1.sources.r1.type=org.apache.flume.source.kafka.KafkaSource
a1.sources.r1.zookeeperConnect=127.0.0.1:2181
a1.sources.r1.topic=adver
#agent.sources.kafkaSource.groupId=flume
a1.sources.r1.kafka.consumer.timeout.ms=100
 
a1.channels.ch1.type=memory
a1.channels.ch1.capacity=1000
a1.channels.ch1.transactionCapacity=100
 
 
# the sink of hdfs
a1.sinks.s1.type=hdfs
a1.sinks.s1.hdfs.path=hdfs://192.168.152.34:8020/foriseholdings/Algorithm/applogs/1000/%Y%m%d
a1.sinks.s1.hdfs.writeFormat=Text
a1.sinks.s1.hdfs.fileType=DataStream
a1.sinks.s1.hdfs.round = true
a1.sinks.s1.hdfs.roundValue = 3
a1.sinks.s1.hdfs.roundUnit = minute
a1.sinks.s1.hdfs.useLocalTimeStamp = true
a1.sinks.s1.hdfs.rollCount = 0
a1.sinks.s1.hdfs.rollSize = 134217728

a1.sources.r1.channels = ch1
a1.sinks.s1.channel = ch1

 

你可能感兴趣的:(flume,kafka)