#!/bin/bash if [ $# != 2 ]; then echo "useage: $0 <hdfs_file> <dest_dir>" exit 1 fi CURR_PATH="$(cd "`dirname "$0"`"/; pwd)" hosts_file=$CURR_PATH"/hosts" if [ ! -f "$hosts_file" ]; then echo "$hosts_file not exist" exit 1 fi source_file=$1 dest_dir=$2 filename=$(echo $source_file | awk -F '/' '{print $NF}') destfilename=$dest_dir"/"$filename echo $filename echo $destfilename hadoop fs -get $source_file $dest_dir for h in $(cat $hosts_file) do scp $destfilename root@$h:$destfilename done
原理就是在主节点上(能够免秘钥ssh集群其他所有节点)执行该脚本(取名为distribute-file),hdfs_file为hdfs上的一个文件的完整路径,dest_dir为节点本地目录用来存放分发文件的目录。
必须以root用户执行,与该脚本同级目录下面必须放置一个名叫hosts的文件,里面写上你要分发的所有的节点的hostname。
eg.:
./distribute-file /user/hdfs/tmp/fairscheduler.xml /etc/spark
#!/bin/bash if [ $# != 1 ]; then echo "useage: $0 <cmd>" exit 1 fi CURR_PATH="$(cd "`dirname "$0"`"/; pwd)" hosts_file=$CURR_PATH"/hosts" if [ ! -f "$hosts_file" ]; then echo "$hosts_file not exist" exit 1 fi remote_cmd=$1 for h in $(cat $hosts_file) do ssh root@$h "$remote_cmd" done远程分发执行命令脚本(取名叫distribute-exec),通过免秘钥ssh到远程主机上执行命令,用法大体和上面那个脚本相似,也是要当前目录下面放置hosts文件,举例:
./distribute-exec "cd /opt/cloudera/parcels/CDH/lib/spark/lib; rm -f spark-assembly.jar; ln -s spark-assembly-with-hive-cdh5.3.2.jar spark-assembly.jar;"
多个命令之间用分号分隔