flume实时采集数据+Kafka+shell脚本+定时任务crontab

1.flume实时采集数据

修改 Flume agent config 文件

#cd 到flume的conf目录
[root@localhost conf]# cd /usr/hdp/2.6.4.0-91/flume/conf
# 编辑配置文件
[root@localhost conf]# vim flume-Aerospace.conf

a1.sources = r1
a1.sinks = k1
a1.channels = c1
 
a1.sources.r1.type = exec
a1.sources.r1.command = tail -f /usr/local/dataaccess.log

a1.sources.r1.channels = c1
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
#设置Kafka的Topic 
a1.sinks.k1.kafka.topic = AerospaceBigBigData
#根据具体的情况进行调整
a1.sinks.k1.kafka.bootstrap.servers = slave2:6667,slave1:6667,master:6667
a1.sinks.k1.kafka.flumeBatchSize = 10
a1.sinks.k1.kafka.batchSize=10
a1.sinks.k1.kafka.producer.acks = -1
a1.sinks.k1.kafka.producer.linger.ms = 1000

a1.channels.c1.type = memory
agent.channels.c1.capacity=100000 
agent.channels.c1.transactionCapacity=100

a1.sinks.k1.channel = c1

启动flume命令

#启动flume agent
bin/flume-ng agent -c conf -f conf/flume-Aerospace.conf -n a1 -Dflume.root.logger=INFO,console
2.创建 kafka
[root@localhost kafka]# cd /usr/hdp/2.6.4.0-91/kafka
#创建topic
./bin/kafka-topics.sh --create --topic AerospaceBigBigData --partitions 1 --replication-factor 1 --zookeeper slave2:2181,slave1:2181,master:2181

#消费者
./bin/kafka-console-consumer.sh --topic AerospaceBigBigData --bootstrap-serve slave2:6667,slave1:6667,master:6667 --from-beginning

#生产者
./bin/kafka-console-producer.sh --topic AerospaceBigBigData --broker-list slave2:6667,slave1:6667,master:6667

#删除topic

./bin/kafka-topics.sh --delete --topic AerospaceBigBigData --zookeeper slave2:2181,slave1:2181,master:2181


#查看所有的topic
./bin/kafka-topics.sh  --zookeeper slave2:2181 --list

3.flume脚本文件

编辑 vim AerospaceBigData.sh 脚本文件

[root@localhost ~]#  vim   AerospaceBigData.sh

#!/bin/bash
export JAVA_HOME=/opt/java/jdk1.8.0_171
path=/usr/hdp/2.6.4.0-91/flume
echo $path
JAR="flume"

function start(){
    echo "开始启动 ...."
    num=`ps -ef|grep java|grep $JAR|wc -l`
    echo "进程数:$num"
    if [ "$num" = "0" ] ; then
        # 请自行修改启动的所需要的参数
        eval nohup $path/bin/flume-ng agent -c $path/conf -f $path/conf/flume-Aerospace.conf -n a1 -Dflume.root.logger=INFO
        echo "启动成功...."
        echo "日志路径: $path/logs/flume.log"
        exit 0
    else
        echo "进程已经存在,启动失败,请检查....."
        exit 0
    fi
}

function stop(){
    echo "开始stop ....."
    num=`ps -ef|grep java|grep $JAR|wc -l`
    if [ "$num" != "0" ] ; then
        #ps -ef|grep java|grep $JAR|awk '{print $2;}'|xargs kill -9
        # 正常停止flume
        ps -ef|grep java|grep $JAR|awk '{print $2;}'|xargs kill
        echo "进程已经关闭..."
    else
        echo "服务未启动,无需停止..."
    fi
}


function restart(){
    echo "begin stop process ..."
    stop
    # 判断程序是否彻底停止
    num=`ps -ef|grep java|grep $JAR|wc -l`
    while [ $num -gt 0 ]; do
        sleep 1
        num=`ps -ef|grep java|grep $JAR|wc -l`
    done
    echo "process stoped,and starting ..."
    start
    echo "started ..."
}

case "$1" in
    "start")
      start $@
      exit 0
    ;;
    "stop")
      stop
      exit 0
     ;;
    "restart")
       restart
       exit 0
     ;;
    *)
       echo "用法: $0 {start|stop|restart}"
       exit 1
    ;;
esac
4.定时任务

用crontab -e进入当前用户的工作表编辑,是常见的vim界面

[root@localhost ~]# crontab -e
# 编辑自己的定时任务
* 9 * * * /usr/local/code/generate_history_v2.py
* 8 * * * /usr/hdp/2.6.4.0-91/flume/AerospaceBigData.sh start
* 23 * * * /usr/hdp/2.6.4.0-91/flume/AerospaceBigData.sh stop

重新启动crontab服务

/sbin/service crond start //启动服务
/sbin/service crond stop //关闭服务
/sbin/service crond restart //重启服务
/sbin/service crond reload //重新载入配置

你可能感兴趣的:(flume)