dingshi.sh
#!/bin/sh
#Section configuration(配置部分)
#Task Time ,example:203000(Time 20:30:00);190000(Time 19:00:00);
startTime=150100
#the programs you want to execute(要执行的程序)
program=nohup hive -e 'select count(1) from test;' >> $(date '+%Y%m%d').log 2>&1 &
#Section promgram (程序执行部分)
perDate=$(date "+%Y%m%d")
isNewDay=1
isFirstTime=1
echo 'Task schedule Time: ('$startTime') program: Waiting...'
while true ; do
curTime=$(date "+%H%M%S")
curDate=$(date "+%Y%m%d")
#Check week day(周末不执行)
week=`date +%w`
if [ $week -eq 6 ] || [ $week -eq 0 ];then
isNewDay=0
sleep 1
continue
else
#check and run script(工作日执行)
if [ "$isNewDay" -eq "1" ];then
if [ "$curTime" -gt "$startTime" ];then
if [ "$isFirstTime" -eq "0" ];then
echo 'The program Running...'
$program
echo 'The program Stopped...'
fi
isNewDay=0
else
if [ "$isFirstTime" -eq "1" ];then
echo 'New Day: ('$curDate') Task schedule Time: ('$startTime') Waiting...'
isFirstTime=0
fi
fi
else
#new day start(开始新的一天)
if [ "$curDate" -gt "$perDate" ];then
echo 'New Day: ('$curDate') Task schedule Time: ('$startTime') Waiting...'
isNewDay=1
perDate=$curDate
fi
fi
sleep 1
fi
done
start_offline.sh
#!/bin/sh
DIR=$(cd "$(dirname "$0")";pwd)
mkdir -p log/run
day=`date +%Y%m%d`
program=nohup spark-submit --master spark://master:7077 --executor-memory 10G --num-executors 10 --executor-cores 10 --total-executor-cores 100 --conf spark.port.maxRetries=128 --driver-memory 10G --jars ojdbc7-12.1.0.2.jar,fastjson-1.2.46.jar --class com.xx.mainclass --driver-java-options -Djava.security.egd=file:/dev/./urandom xxx.jar $1 $2 $3 >>log/run/spark_application_offline_$day.log 2>&1 &
$program
#上面的这种执行$program 如果用java 的process调用的话会立刻得到返回值,并且这个返回值不是program的返回值而是脚本自身的返回值
#要想得到program的返回值需要 用$符以及括号扩起来如下(后面的2重定向到1也很重要):
$(nohup spark-submit --master spark://master:7077 --executor-memory 10G --num-executors 10 --executor-cores 10 --total-executor-cores 100 --conf spark.port.maxRetries=128 --driver-memory 10G --jars ojdbc7-12.1.0.2.jar,fastjson-1.2.46.jar --class com.xx.mainclass --driver-java-options -Djava.security.egd=file:/dev/./urandom xxx.jar $1 $2 $3 >>log/run/spark_application_offline_$day.log 2>&1 &)
stop.sh
#!/bin/sh
timeout=10
param=$1
DIR=$(cd "$(dirname "$0")";pwd)
pid=$DIR/spark-$param.pid
#如果有第二个参数为空的话就杀进程,如果不为空的话,就打印offline执行时间太长了,执行了"$2"分钟了
if [ ! -n "$2" ]; then
if [ -f $pid ]; then
TARGET_PID=`cat $pid`
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo stopping $param
kill $TARGET_PID
sleep $timeout
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo " did not stop gracefully after $timeout seconds: killing with kill -9"
kill -9 $TARGET_PID
fi
else
echo no find $param pid to stop
fi
rm -f $pid
else
echo no find $param pid to stop
fi
else
echo "offline执行时间太长了,执行了"$2"分钟了">> log/run/start.log
fi
param=Spark-$1
pid=$DIR/IRM-$param.pid
if [ -f $pid ]; then
TARGET_PID=`cat $pid`
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo stopping $param
kill $TARGET_PID
sleep $timeout
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo " did not stop gracefully after $timeout seconds: killing with kill -9"
kill -9 $TARGET_PID
fi
else
echo no find $param pid to stop
fi
rm -f $pid
else
echo no find $param pid to stop
fi
string=`cat path.properties |grep task=hdfs| sed s/[[:space:]]//g`
array=(${string//=/ })
string1=`cat path.properties |grep sparkLoaclLifeCycle=hdfs| sed s/[[:space:]]//g`
array1=(${string1//=/ })
hdfs dfs -rm -r ${array[1]}
hdfs dfs -rm -r ${array1[1]}
start.sh
#!/bin/sh
DIR=$(cd "$(dirname "$0")";pwd)
mkdir -p log/run
pid=$DIR/spark-$1.pid
program1=nohup java -Djava.security.egd=file:/dev/./urandom -Djava.ext.dirs=$JAVA_HOME/jre/lib/ext:$SPARK_HOME/jars:.:./lib/ -cp ./lib/ -jar xx.jar $1 $2 $3 >> log/run/start.log 2>&1 &
$program1
echo "starting success"
echo $! > $pid
install.sh
#! /bin/bash
pids=$(ps -ef |grep "xxx.jar consumer"|grep -ve grep|awk '{print $2}')
for pid in $pids
do
echo $pid
kill -9 $pid
done
pids=$(ps -ef |grep "xxx.jar product"|grep -ve grep|awk '{print $2}')
for pid in $pids
do
echo $pid
kill -9 $pid
done
#上面的操作是先获取程序的进程号,将其停掉
#接下来就是备份原来的程序并且把新的程序安装到当前目录
ls_time=`date +%Y%m%d%H%M%S`
path=/app/path
path_bak=/app/path_bak
current_user=`whoami`
if [ "$current_user" != "cuixiaojie" ]
then
echo "current user is ($current_user),is not betadmin,exit"
exit 1
fi
mkdir -p $path_bak
mv $path $path_bak/$ls_time
mkdir -p $path
cd $path
cp -r /app/install/* .
nohup java -Djava.ext.dirs=./lib/:$JAVA_HOME/jre/lib/ext -cp ./lib/ -jar xxx.jar consumer > /dev/null 2>&1 &
c=`ps -ef|grep "xxx.jar consumer"|grep -v grep|wc -l`
if [[ ${c} -ne 1 ]];then
echo "启动consumer失败,请联系开发人员!"
exit 1
fi
nohup java -Djava.ext.dirs=./lib/:$JAVA_HOME/jre/lib/ext -cp ./lib/ -jar xxx.jar product > /dev/null 2>&1 &
p=`ps -ef|grep "xxx.jar product"|grep -v grep|wc -l`
if [[ ${p} -ne 1 ]];then
echo "启动product失败,请联系开发人员!"
exit 1
fi
echo "安装成功"
rollback.sh
#! /bin/bash
pids=$(ps -ef |grep "xxx.jar consumer"|grep -ve grep|awk '{print $2}')
for pid in $pids
do
echo $pid
kill -9 $pid
done
pids=$(ps -ef |grep "xxx.jar product"|grep -ve grep|awk '{print $2}')
for pid in $pids
do
echo $pid
kill -9 $pid
done
path=/app/path
path_bak=/app/path_bak
current_user=`whoami`
if [ "$current_user" != "cuixiaojie" ]
then
echo "current user is ($current_user),is not betadmin,exit"
exit 1
fi
cd $path_bak
last_bak=`ls -a|sort -r |head -n 1`
if [ ! $1 ];then
echo "will rollback to the "$last_bak" bak"
else
last_bak=$1
echo "will rollback to the "$last_bak" bak"
fi
if [ -d $last_bak ]; then
echo $last_bak" is exists"
else
echo $last_bak" is not exists,rollback will fail"
exit 1
fi
rm -rf $path
mv $path_bak/$last_bak $path
cd $path
nohup java -Djava.ext.dirs=./lib/:$JAVA_HOME/jre/lib/ext -cp ./lib/ -jar xxx.jar consumer > /dev/null 2>&1 &
c=`ps -ef|grep "xxx.jar consumer"|grep -v grep|wc -l`
if [[ ${c} -ne 1 ]];then
echo "启动consumer失败,请联系开发人员!"
exit 1
fi
nohup java -Djava.ext.dirs=./lib/:$JAVA_HOME/jre/lib/ext -cp ./lib/ -jar xxx.jar product > /dev/null 2>&1 &
p=`ps -ef|grep "xxx.jar product"|grep -v grep|wc -l`
if [[ ${p} -ne 1 ]];then
echo "启动product失败,请联系开发人员!"
exit 1
fi
echo "安装成功"
isExsitsHDFS.sh
#!/bin/bash
read file
hadoop fs -test -d $file
if [ $? -eq 0 ]; then
echo "$file是一个文件夹"
else
echo "$file不是文件夹"
fi
hadoop fs -test -f $file
if [ $? -eq 0 ]; then
echo "$file是一个文件"
else
echo "$file不是文件"
fi
hadoop fs -test -s $file
if [ $? -eq 0 ]; then
echo "$file Is greater than zero bytes in size"
else
echo "$file Is not greater than zero bytes in size"
fi
hadoop fs -test -z $file
if [ $? -eq 0 ]; then
echo "$file Is zero bytes in size"
else
echo "$file is not zero bytes in size"
fi
isExsitsLocal.sh
#!/bin/bash
read file
if [ ! -f "$file" ]; then
echo "文件不存在"
fi
listener.sh
#!/bin/sh
dir="/data/2018data_20180101-20180716"
sleeptime=3
files=$(ls $dir)
for filename in $files
do
runningdate=`cat /home/cuixiaojie/log/run/TaskIsRunningDate |tr -cd "[0-9]"`
count=`hdfs dfs -ls /bigdata/lifecycle |wc -l`
val=`expr $count - 1`
business_date=`echo $filename |tr -cd "[0-9]"`
echo "接下来给hiveload的业务日是"$business_date
while [ $val -gt 0 ]
do
echo "大数据程序正在跑"$runningdate"等待中。。"
count=`hdfs dfs -ls /bigdata/lifecycle |wc -l`
val=`expr $count - 1`
sleep $sleeptime
done
echo "从这里load"
while [ $business_date -gt $runningdate ]
do
echo "大数据程序还没load"$business_date"的数据"
sleep $sleeptime
runningdate=`cat /home/cuixiaojie/log/run/TaskIsRunningDate |tr -cd "[0-9]"`
done
done
clear_expired_xml.sh 清理过期的xml文件,xml文明名是时间戳
#!/bin/sh
current=`date "+%Y-%m-%d %H:%M:%S"`
#精确到秒
timeStamp=`date -d "$current" +%s`
echo $timeStamp
#将current转换为时间戳,精确到毫秒秒
#currentTimeStamp=$((timeStamp*1000+`date "+%N"`/1000000))
#echo $currentTimeStamp
#时间戳小于该时间戳的都是过期的
expired_timeStamp=`expr $timeStamp - $1`
echo $expired_timeStamp
function getdir(){
for element in `ls $1`
do
dir_or_file=$1"/"$element
if [ -d $dir_or_file ]
then
getdir $dir_or_file
else
xml_timestamp=`echo $dir_or_file |tr -cd "[0-9]"`
if [ $xml_timestamp -lt $expired_timeStamp]
then
#rm -rf "$dir_or_file"
echo "将要被删除的xml: $dir_or_file"
fi
fi
done
}
echo "3秒后将开始清理,清理将持续一段时间,清理结束后会有提示,请勿中途停掉"
sleep 3
root_dir="/data/spider"
getdir $root_dir
echo "清理结束"