检测hdfs 文件是否存在并获取文件列表、大小、创建时间的shell开发

命名 check_hdfs_dir.sh

运行:

sh check_hdfs_dir.sh /usr/data/000/

代码如下:

#!/bin/bash

 

lujing=$1

echo $lujing

 

command="hadoop fsck "${lujing}" > hadoop_fsck.txt"

eval $command

val=`awk '/Total size/ {print $3}' hadoop_fsck.txt`

 

#val=97987999

 

val_m=$[1024*1024]

val_k=$[1024]

val_g=$[1024*1024*1024]

val_t=$[1024*1024*1024*1024]

 

if [ `expr $val / $val_t` -gt 1 ]

then

echo "文件大小`expr $val / $val_t`t"

elif [ `expr $val / $val_g` -gt 1 ]

then

echo "文件大小`expr $val / $val_g`g"

elif [ `expr $val / $val_m` -gt 1 ]

then

echo "文件大小`expr $val / $val_m`m"

elif [ `expr $val / $val_k` -gt 1 ]

then

echo "文件大小`expr $val / $val_k`k"

fi

 

val1=`awk '/Total dirs/ {print $3}' hadoop_fsck.txt`

echo "文件夹数aa:$val1"

 

val2=`awk '/Total files/ {print $3}' hadoop_fsck.txt`

echo "文件数bb:$val2"

 

val3=`awk '/Total blocks (validated)/ {print $3}' hadoop_fsck.txt`

echo "总块数cc:$val3"





 

val_k=$[1024]

val_g=$[1024*1024*1024]

val_t=$[1024*1024*1024*1024]

val_m=$[1024*1024]

 

command="hadoop fs -ls "${lujing}" > hadoop_ls.txt"

eval $command

 

command="grep "${lujing}" hadoop_ls.txt | head -1 | awk '{print \$6}'"

echo "----****"

echo $command

chuangjianshijian=`eval $command`

echo "创建时间ddd=${chuangjianshijian}"

 

while read line

do

    #if [true]

    if [ ${line:0:5} == "Found" ]

    then

        echo "首行略过"

    else

        

        echo $line | awk -F ' ' '{print $5,$6,$7,$8}' | while read daxiao day time mulu

        do

            echo daxiao=$daxiao day=$day time=$time mulu=$mulu

            length=${#lujing}

            #echo -n ${lujing} | wc -c

            file_name=${mulu:length}

            echo 文件名=$file_name

            #command="hadoop fs -cat hdfs://10.87.49.221:8020"${mulu}" | wc -l | sed -n '1p'"

            command="hadoop fs -cat "${mulu}" | wc -l | sed -n '1p'"

            hangshu=`eval $command`

            #hangshu = $(hadoop fs -cat $mulu | wc -l)

            echo "行数:$hangshu"

            echo length=$length

            

            val=$daxiao

 

            if [ `expr $val / $val_t` -gt 1 ]

            then

               echo "文件大小`expr $val / $val_t`t"

            elif [ `expr $val / $val_g` -gt 1 ]

            then

             echo "文件大小`expr $val / $val_g`g"

            elif [ `expr $val / $val_m` -gt 1 ]

            then

             echo "文件大小`expr $val / $val_m`m"

            elif [ `expr $val / $val_k` -gt 1 ]

            then

             echo "文件大小`expr $val / $val_k`k"

            fi

            echo "上传时间:day:$day,time:$time"

        done

        

    fi

done < hadoop_ls.txt

你可能感兴趣的:(检测hdfs 文件是否存在并获取文件列表、大小、创建时间的shell开发)