Flink监控脚本

Flink监控脚本

提前准备好Flink任务的启动脚本

vim start01.sh

#!/bin/bash
path=$(hadoop fs -ls -t -r  /flink/checkpoints-ae6/* |  sort -nr  | grep 'chk' |head -n 1 | awk '{print $NF}')
if [ ! $path ]; then
 nohup /home/hadoop/flink-1.13.5/bin/flink run -m yarn-cluster -ys 1 -ynm etl-AE6ss -yjm 4096 -ytm 4096 -yD env.java.opts="-Dfile.encoding=UTF-8 -Dsun.jnu.encoding=UTF-8" -d -c com.talkingdata.feilong.app.impl.ETLApplication etl.jar /home/hadoop/nfs/config/feilong/flink-etl/etl-config.properties > ae6log.out 2>&1 &
else
 nohup  /home/hadoop/flink-1.13.5/bin/flink run --allowNonRestoredState -s hdfs://172.23.4.223:8020$path -m yarn-cluster -ys 1 -ynm etl-AE6ss -yjm 4096 -ytm 4096 -yD env.java.opts="-Dfile.encoding=UTF-8 -Dsun.jnu.encoding=UTF-8" -d -c com.talkingdata.feilong.app.impl.ETLApplication etl.jar /home/hadoop/nfs/config/feilong/flink-etl/etl-config.properties > ae6log.out 2>&1 &
fi

监控脚本

#! /bin/bash
source ~/.bash_profile
##flink 启动脚本里的名称,多余名称的一定要注释掉
feilong_name="feilong_flink-streaming-etl"
ae6_name="ae6_flink-streaming-etl"
ae3_name="ae3_flink-streaming-etl"


flink_name="${feilong_name} ${ae6_name} ${ae3_name}" ##列表名称参照flink启动脚本里的名称,多个flink任务时,名称不能重复
#list=$(yarn application -list | awk 'NR >2{print $1,$2}')
#echo $list
flink_running_name=$(yarn application -list | awk 'NR >2{print $2}')
TIME=`date "+%Y-%m-%d %H:%M:%S"`

##执行相应的flink名称重启任务,#####判断名称要与flink_name的名称以及个数保持一致
restart_flink(){
if [[ ${file} == "${feilong_name}" ]];then
  echo "重启feilong的flink任务"
nohup sh /home/hadoop/flink-1.13.5/etl/start01.sh > /home/hadoop/flink-1.13.5/etl/feilong_restart_log.out 2>&1
elif [[ ${file} == "${ae6_name}" ]];then
  echo "重启AE6的flink任务"
nohup sh /home/hadoop/flink-1.13.5/etl/start02.sh > /home/hadoop/flink-1.13.5/etl/ae6_restart_log.out 2>&1
else
  echo "${file} 不在列表内请调整脚本,增加判断"
fi
}

##执行全部flink重启任务
restart_all_flink(){
for file in ${flink_name}
do
restart_flink
done
}


##执行部分flink重启任务
restart_part_flink(){
for file in ${flink_name}
do
##判断是否在任务列表内,未在列表内时进行重启
if [[ ${flink_running_name} =~ ${file} ]];then
echo "${file} 任务正常"
else
echo "${file}未在列表内,需要重启"
restart_flink
fi
done
}

if [[ ! -n "$flink_running_name" ]];then
   echo "[ -flink任务列表为空,执行全部重启flink命令 :$TIME]"
   restart_all_flink
else
   echo "[ -flink任务列表不为空,存在正在运行的flink任务 :$TIME]"
   restart_part_flink
fi

所有打不死你的,都会使你变得更强大!

你可能感兴趣的:(flink,hadoop,linux)