hadoop MR 统计分析日志脚本一例

#! /bin/sh

############################

#split today and yesterday


for i in $(seq 10)

do

  echo " " >> /u1/hadoop-stat/stat.log

done

echo "begin["`date "+%Y-%m-%d" -d "-1 days"`"]" >> /u1/hadoop-stat/stat.log

############################

#remove file


function removeFilepathNotCurrentMonth(){

month=`date "+%Y-%m" -d "-1 days"`

for file in ` ls $1 `

do

if [ "$month" != "$file" ]; then

rm -rf $1"/"$file

fi

done

}

GYLOG_PATH="/u1/hadoop-stat/gylog"

NGINXLOG_PATH="/u1/hadoop-stat/nginxlog"


echo "begin remove gylogpath's files not in current month" >> /u1/hadoop-stat/stat.log

removeFilepathNotCurrentMonth $GYLOG_PATH


echo "begin remove nginxlogpath's files not in current month" >> /u1/hadoop-stat/stat.log

removeFilepathNotCurrentMonth $NGINXLOG_PATH


############################

#scp file between hosts


day=`date "+%Y-%m-%d" -d "-1 days"`

month=`date "+%Y-%m" -d "-1 days"`

gyfilename="gylog-"$day".log"

gyfilepath=$GYLOG_PATH"/"$month

if [ ! -d "$gyfilepath" ]; then

mkdir "$gyfilepath"

fi

if [ ! -f "$gyfilepath/$gyfilename" ]; then

echo "begin scp gylog" >> /u1/hadoop-stat/stat.log

scp gy02:/u1/logs/gylog/$gyfilename $gyfilepath/

fi


nginxfilename="ngxinlog-"$day".log"

nginxfilepath=$NGINXLOG_PATH"/"$month

if [ ! -d "$nginxfilepath" ]; then

mkdir "$nginxfilepath"

fi

if [ ! -f "$nginxfilepath/$nginxfilename" ]; then

echo "begin scp nginxlog" >> /u1/hadoop-stat/stat.log

scp gy01:/u1/logs/lbnginx/gy_access.log.1 $nginxfilepath/

mv $nginxfilepath/gy_access.log.1 $nginxfilepath/$nginxfilename

fi


###########################

#copy file to hadoop


GYLOG_HADOOP_PATH="/logs/gylog"

NGINXLOG_HADOOP_PATH="/logs/nginxlog"


monthhadoop=`date "+%Y-%m-%d" -d "-1 days"`

gyhadoopfilepath=$GYLOG_HADOOP_PATH"/"$monthhadoop

gyhadoopfilepathinput=$gyhadoopfilepath"/input"

gyhadoopfilepathoutput=$gyhadoopfilepath"/output"

/u1/hadoop-1.0.1/bin/hadoop dfs -test -e $gyhadoopfilepath

if [ $? -ne 0 ]; then

echo "begin mkdir gyhadoopfilepath in hadoop because of not exist:"$gyhadoopfilepath >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $gyhadoopfilepath

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $gyhadoopfilepathinput

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $gyhadoopfilepathoutput

fi


/u1/hadoop-1.0.1/bin/hadoop dfs -test -e $gyhadoopfilepathinput/$gyfilename

if [ $? -ne 0 ]; then

echo "begin copy gyhadoopfile to hadoop" >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop dfs -copyFromLocal $gyfilepath/$gyfilename $gyhadoopfilepathinput/

fi


nginxhadoopfilepath=$NGINXLOG_HADOOP_PATH"/"$monthhadoop

nginxhadoopfilepathinput=$nginxhadoopfilepath"/input"

nginxhadoopfilepathoutput=$nginxhadoopfilepath"/output"

/u1/hadoop-1.0.1/bin/hadoop dfs -test -e $nginxhadoopfilepath

if [ $? -ne 0 ]; then

echo "begin mkdir nginxhadoopfilepath in hadoop because of not exist:"$nginxhadoopfilepath >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $nginxhadoopfilepath

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $nginxhadoopfilepathinput

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $nginxhadoopfilepathoutput

fi


/u1/hadoop-1.0.1/bin/hadoop dfs -test -e $nginxhadoopfilepathinput/$nginxfilename

if [ $? -ne 0 ]; then

echo "begin copy nginxhadoopfile to hadoop" >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop dfs -copyFromLocal $nginxfilepath/$nginxfilename $nginxhadoopfilepathinput/

fi


##########################

#begin hadoop stat


#echo "begin hadoop stat RequestTimeCount" >> /u1/hadoop-stat/stat.log

#/u1/hadoop-1.0.1/bin/hadoop jar /u1/hadoop-stat/stat.jar gy.log.mr.requestTime.RequestTimeCount $day


#echo "begin hadoop stat RequestCount" >> /u1/hadoop-stat/stat.log

#/u1/hadoop-1.0.1/bin/hadoop jar /u1/hadoop-stat/stat.jar gy.log.mr.request.RequestCount $day


echo "begin hadoop stat NginxCount" >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop jar /u1/hadoop-stat/stat.jar gy.log.mr.nginx.NginxCount $day


echo "begin hadoop stat GylogCount" >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop jar /u1/hadoop-stat/stat.jar gy.log.mr.gylog.GylogCount $day


##########################

#end for all

echo "end["`date "+%Y-%m-%d" -d "-1 days"`"]" >> /u1/hadoop-stat/stat.log



注:

/u1/hadoop-stat/stat.jar gy.log.mr.request.RequestCount

/u1/hadoop-stat/stat.jar gy.log.mr.nginx.NginxCount

/u1/hadoop-stat/stat.jar gy.log.mr.gylog.GylogCount

上面的mr是自定义的统计规则,可根据自己的需求开发


其他更多的主要是使用了hadoop的基本命令,相信大家了解hadoop的也很容易就能看懂的。

你可能感兴趣的:(mapreduce,hadoop,脚本)