Linux下Shell编程实现基于Hadoop的ETL(流程篇)

加群:397706991,共同学习

具体Shell代码下载:http://download.csdn.net/detail/luo849278597/9490920

conf/flow.sh



	
	../conf/import.xml
	../conf/hive.xml
	../conf/export.xml


bin/flow.sh
#! /bin/bash
##########################################
#sed命令是Shell编程中用于处理字符串,过滤出所需字符串的命令  
#tr -d '\r' 用于删除每行中的换行符
##########################################
#get the environment
if [ -f ~/.bashrc ];
then
 . ~/.bashrc
fi
#file exit
if [ -z $1 ];then
echo 'USAGE:COMMAND FILENAME'
exit 0
fi

bin=`dirname "$0"`
bin=`cd "$bin"; pwd`

declare -i j=0
declare -a commands
declare -a files

content=`sed -e 's/\s*\(.*\)\s*$/\1/g' -e 's/\s*\(=\)\s*/\1/g' -e '/^\(\s\)*$/d' -e '/^$/d' $1`

while read line
do \
	if echo ${line}|grep -qE "^.*<\/task>//' -e 's///p'|tr -d '\r'`
		file=`echo ${line}|sed -n -e 's///' -e 's/<\/task>//' -e 's///p'|tr -d '\r'`
		if [ ${#file} -ne 0 ] ; then
			commands[$j]="$command"
			files[$j]="$file"
			let j++
		fi
	fi
done \
<

你可能感兴趣的:(Hadoop,Linux服务器)