Create a hadoop2.4.1 Cluster on Ubuntu (host mode)

1. Prepare three ubuntu hosts for poc

192.168.1.4 testdocker01
192.168.1.5 testdocker02
192.168.1.6 testdocker03

root@testdocker01:~# cat /etc/os-release
NAME="Ubuntu"
VERSION="14.04, Trusty Tahr"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 14.04 LTS"
VERSION_ID="14.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"

root@testdocker01:~# docker version
Client version: 1.1.2
Client API version: 1.13
Go version (client): go1.2.1
Git commit (client): d84a070
Server version: 1.1.2
Server API version: 1.13
Go version (server): go1.2.1


2. Get all images related to Hadoop Cluster on each node

2.1 get all images

root@testdocker01:~# docker pull sequenceiq/hadoop-docker

root@testdocker02:~# docker pull sequenceiq/hadoop-docker

root@testdocker03:~# docker pull sequenceiq/hadoop-docker

2.2 check the image on each node

root@testdocker01:~#docker pull sequenceiq/hadoop-docker:2.4.1

root@testdocker01:~# docker images |grep sequenceiq/hadoop-docker |grep 2.4.1

sequenceiq/hadoop-docker           2.4.1               8040f2b27b10        4 weeks ago         854.1 MB


3. Get source code of docker-scripts on each node

root@testdocker01:/# git clone https://github.com/jay-lau/hadoop-docker-master-cluster.git
Cloning into 'hadoop-docker-master-cluster'...
remote: Counting objects: 16, done.
remote: Compressing objects: 100% (13/13), done.
remote: Total 16 (delta 1), reused 16 (delta 1)
Unpacking objects: 100% (16/16), done.
Checking connectivity... done.

root@testdocker02:/# git clone https://github.com/jay-lau/hadoop-docker-master-cluster.git

root@testdocker03:/# git clone https://github.com/jay-lau/hadoop-docker-master-cluster.git

Notes: By default, if nodetype is N, we will start both of namendoe and datanode. If we want only start namenode, please remove the start logic in bootstrap.sh.

if [ $3 = "N" ] ; then
    echo "starting Hadoop Namenode,resourcemanager,datanode,nodemanager"

    #rm -rf  /tmp/hadoop-root
    #$HADOOP_PREFIX/bin/hdfs namenode -format> /dev/null 2>&1
    $HADOOP_PREFIX/sbin/hadoop-daemon.sh  start namenode > /dev/null 2>&1
    echo "Succeed to start namenode"

    $HADOOP_PREFIX/sbin/yarn-daemon.sh  start resourcemanager > /dev/null 2>&1
    echo "Succeed to start resourcemanager"


    #$HADOOP_PREFIX/sbin/hadoop-daemon.sh  start datanode > /dev/null 2>&1
    #echo "Succeed to start datanode"

    #$HADOOP_PREFIX/sbin/yarn-daemon.sh  start nodemanager > /dev/null 2>&1
    #echo "Succeed to start nodemanager"

    $HADOOP_PREFIX/bin/hadoop dfsadmin -safemode leave
else
    echo "starting Hadoop Datanode,nodemanager"

    rm -rf  /tmp/hadoop-root
    $HADOOP_PREFIX/sbin/hadoop-daemon.sh  start datanode > /dev/null 2>&1
    echo "Succeed to start datanode"

    $HADOOP_PREFIX/sbin/yarn-daemon.sh  start nodemanager > /dev/null 2>&1
    echo "Succeed to start nodemanager"
fi

4. Build Hadoop docker on each node

#enter the folder of  the hadoop-docker-master-cluster scripts.

4.1 build the images on testdocker01

root@testdocker01:~#cd /root/hadoop-docker-master-cluster

root@testdocker01:~#docker build -t="sequenceiq/hadoop-cluster-docker:2.4.1" .

root@testdocker01:~/hadoop-docker-master-cluster# docker build -t="sequenceiq/hadoop-cluster-docker:2.4.1" .                       

Sending build context to

Docker daemon   149 kB
Sending build context to Docker daemon
Step 0 : FROM sequenceiq/hadoop-docker:2.4.1
 ---> 8040f2b27b10
Step 1 : MAINTAINER SequenceIQ
 ---> Using cache
 ---> 882cff7182a4
Step 2 : USER root
 ---> Using cache
 ---> 408f0a434373
Step 3 : ADD core-site.xml $HADOOP_PREFIX/etc/hadoop/core-site.xml
 ---> 927521fd85ae
Removing intermediate container 7df7dba3d730
Step 4 : ADD hdfs-site.xml $HADOOP_PREFIX/etc/hadoop/hdfs-site.xml
 ---> 949460061b1e
Removing intermediate container e4cb6829fdb9
Step 5 : ADD mapred-site.xml $HADOOP_PREFIX/etc/hadoop/mapred-site.xml
 ---> e268a15c1d3f
Removing intermediate container 5c901152fb30
Step 6 : ADD yarn-site.xml $HADOOP_PREFIX/etc/hadoop/yarn-site.xml
 ---> 284ca37d9857
Removing intermediate container 9c780fc17aa7
Step 7 : ADD slaves $HADOOP_PREFIX/etc/hadoop/slaves
 ---> 1e3a4ffa5632
Removing intermediate container 2094c6c5622f
Step 8 : ADD bootstrap.sh /etc/bootstrap.sh
 ---> b8c32c42b655
Removing intermediate container 0d9616f32157
Step 9 : RUN chown root:root /etc/bootstrap.sh
 ---> Running in 103d2f89a580
 ---> 765f1e58c184
Removing intermediate container 103d2f89a580
Step 10 : RUN chmod 700 /etc/bootstrap.sh
 ---> Running in 5cc86e285299
 ---> 1a4b1dfb615c
Removing intermediate container 5cc86e285299
Step 11 : ENV BOOTSTRAP /etc/bootstrap.sh
 ---> Running in 57e323c93b5f
 ---> a1082f764127
Removing intermediate container 57e323c93b5f
Step 12 : RUN rm -f /etc/ssh/ssh_host_dsa_key
 ---> Running in 6be294648cc9
 ---> a9c5d835c39c
Removing intermediate container 6be294648cc9
Step 13 : RUN rm -f /etc/ssh/ssh_host_rsa_key
 ---> Running in 80f727977a76
 ---> ae19d6e5171d
Removing intermediate container 80f727977a76
Step 14 : RUN rm -f /root/.ssh/id_rsa
 ---> Running in 3fbebc17ee38
 ---> c473e2ed5f6f
Removing intermediate container 3fbebc17ee38
Step 15 : RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key
 ---> Running in 72b62e9a0656
 ---> f7444a1eb624
Removing intermediate container 72b62e9a0656
Step 16 : RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
 ---> Running in 550b8fb8809d
 ---> 3338f146799a
Removing intermediate container 550b8fb8809d
Step 17 : RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa
 ---> Running in 99d28e7ead76
 ---> d4befb3f8898
Removing intermediate container 99d28e7ead76
Step 18 : RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
 ---> Running in 74be0823aad2
 ---> cf03143c566f
Removing intermediate container 74be0823aad2
Step 19 : EXPOSE 50020 50021 50090 50070 50010 50011 50075 50076 8031 8032 8033 8040 8042 49707 22 8088 8030
 ---> Running in 46c625d45f0d
 ---> 62fce6617879
Removing intermediate container 46c625d45f0d
Step 20 : CMD ["-h"]
 ---> Running in 4677defeb509
 ---> 268426cafb54
Removing intermediate container 4677defeb509
Step 21 : ENTRYPOINT ["/etc/bootstrap.sh"]
 ---> Running in d5d4c1a34868
 ---> b51d46a23ae3
Removing intermediate container d5d4c1a34868
Successfully built b51d46a23ae3

root@testdocker01:~/hadoop-docker-master-cluster# docker images|grep sequenceiq/hadoop-cluster-docker
sequenceiq/hadoop-cluster-docker   2.4.1               b51d46a23ae3        6 minutes ago       854.1 MB

4.2 build the images on testdocker02

root@testdocker02:~#cd /root/hadoop-docker-master-cluster
root@testdocker02:~#docker build -t="sequenceiq/hadoop-cluster-docker:2.4.1" .

root@testdocker02:~/hadoop-docker-master-cluster# docker build -t="sequenceiq/hadoop-cluster-docker:2.4.1" .     

4.3 build the images on testdocker03

root@testdocker03:~#cd /root/hadoop-docker-master-cluster
root@testdocker03:~#docker build -t="sequenceiq/hadoop-cluster-docker:2.4.1" .

root@testdocker03:~/hadoop-docker-master-cluster# docker build -t="sequenceiq/hadoop-cluster-docker:2.4.1" .     


5. Configure /etc/hosts file  for each node

configure /etc/hosts file on every nodes
192.168.1.4 testdocker01
192.168.1.5 testdocker02
192.168.1.6 testdocker03

6. Create Hadoop Cluster

# Start a container

docker run --net=host  sequenceiq/hadoop-cluster-docker:2.4.1 $1 $2 $3 $4 $5 $6

Params definition as below:
$1:Hdfs port, such as 9000
$2:Hdfs DataNode port, such as 50010
$3:Type of Namenode or Datanode, such as N | D
$4:Number of hdfs replication, default is 1. Need more improvement for this param.
$5:Default command, run as service "-d", run as interactive "-bash"
$6:Master Node IP address, such as 192.168.1.4

#If we need run interactive, please add "-i -t " options.

# on Ubuntu, we need add  "--privileged"  for net="host" mode. else we will get below issues.

root@testdocker01:~# docker run  -i -t --net="host"  sequenceiq/hadoop-cluster-docker:2.4.1 9001 50010 N 1 -bash 192.168.1.4
2014/08/04 14:48:42 Error response from daemon: Cannot start container 38ee915fef8206c8ae35105258ef63b407d70394ef1e3577dce29cdad832e7f1: unable to remount sys readonly: unable to mount sys as readonly max retries reached

6.1 Create NameNode and DataNode (Interactive ,using -bash, add -i,-t ) on testdocker01:
root@testdocker01#docker stop $(docker ps -a -q)
root@testdocker01#docker rm $(docker ps -a -q)

root@testdocker01:~/hadoop-docker-master-cluster# docker run  -i -t --net="host"  --privileged  sequenceiq/hadoop-cluster-docker:2.4.1 9001   50010 N 1 -bash 192.168.1.4
BOOTSTRAP=/etc/bootstrap.sh
HOSTNAME=testdocker01
TERM=xterm
HADOOP_PREFIX=/usr/local/hadoop
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/java/default/bin
PWD=/
JAVA_HOME=/usr/java/default
SHLVL=1
HOME=/
_=/usr/bin/env
/
Hdfs port:9001
Hdfs DataNode port:50010
Namenode or datanode:N
Number of hdfs replication:1
Default command:-bash
Master ip:192.168.1.4
starting Hadoop Namenode,resourcemanager,datanode,nodemanager
Succeed to start namenode
Succeed to start resourcemanager
Succeed to start datanode
Succeed to start nodemanager
DEPRECATED: Use of this script to execute hdfs command is deprecated.
Instead use the hdfs command for it.

Safe mode is OFF
bash-4.1# jps
485 NodeManager
635 Jps
162 ResourceManager
79 NameNode
209 DataNode

6.2 Create DataNode(backend service,using -d) on testdocker02:
root@testdocker02#docker stop $(docker ps -a -q)
root@testdocker02#docker rm $(docker ps -a -q)

root@testdocker02:~/hadoop-docker-master-cluster# docker run   --net="host"  --privileged sequenceiq/hadoop-cluster-docker:2.4.1 9001   50010 D 1 -d 192.168.1.4

BOOTSTRAP=/etc/bootstrap.sh
HOSTNAME=testdocker02
TERM=xterm
HADOOP_PREFIX=/usr/local/hadoop
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/java/default/bin
PWD=/
JAVA_HOME=/usr/java/default
SHLVL=1
HOME=/
_=/usr/bin/env
/
Hdfs port:9001
Hdfs DataNode port:50010
Namenode or datanode:D
Number of hdfs replication:1
Default command:-bash
Master ip:192.168.1.4
starting Hadoop Datanode,nodemanager
Succeed to start datanode
Succeed to start nodemanager
bash-4.1#
bash-4.1# jps
127 NodeManager
43 DataNode
228 Jps

6.3 Create DataNode(backend service,using -d) on testdocker03:
root@testdocker03#docker stop $(docker ps -a -q)
root@testdocker03#docker rm $(docker ps -a -q)

root@testdocker03:~/hadoop-docker-master-cluster# docker run   --net="host"  --privileged  sequenceiq/hadoop-cluster-docker:2.4.1 9001    50010 D 1 -d 192.168.1.4

7. Check the cluster status

7.1 Access the WEB GUI

Access http://9.111.156.231:50070/dfshealth.html#tab-datanode

or

Access http://192.168.1.4:50070/dfshealth.html#tab-datanode


7.2 Using command line to check the status

bash-4.1# $HADOOP_PREFIX/bin/hdfs dfsadmin -report


8. Run a sample hadoop case

bash-4.1# $HADOOP_PREFIX/bin/hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.4.1.jar grep input output 'dfs[a-z.]+'

14/07/30 23:59:40 INFO client.RMProxy: Connecting to ResourceManager at /192.168.1.4:8032
14/07/30 23:59:40 WARN mapreduce.JobSubmitter: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
14/07/30 23:59:40 INFO input.FileInputFormat: Total input paths to process : 26
14/07/30 23:59:41 INFO mapreduce.JobSubmitter: number of splits:26
14/07/30 23:59:41 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1406778261600_0003
14/07/30 23:59:41 INFO mapred.YARNRunner: Job jar is not present. Not adding any jar to the list of resources.
14/07/30 23:59:42 INFO impl.YarnClientImpl: Submitted application application_1406778261600_0003
14/07/30 23:59:42 INFO mapreduce.Job: The url to track the job: http://testdocker01:8088/proxy/application_1406778261600_0003/
14/07/30 23:59:42 INFO mapreduce.Job: Running job: job_1406778261600_0003

14/07/30 23:59:49 INFO mapreduce.Job: Job job_1406778261600_0003 running in uber mode : false
.........

14/07/31 00:00:17 INFO client.RMProxy: Connecting to ResourceManager at /192.168.1.4:8032
14/07/31 00:00:17 WARN mapreduce.JobSubmitter: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
14/07/31 00:00:17 INFO input.ileInputFormat: Total input paths to process : 1
14/07/31 00:00:18 INFO mapreduce.JobSubmitter: number of splits:1
14/07/31 00:00:18 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1406778261600_0004
14/07/31 00:00:18 INFO mapred.YARNRunner: Job jar is not present. Not adding any jar to the list of resources.
14/07/31 00:00:18 INFO impl.YarnClientImpl: Submitted application application_1406778261600_0004
14/07/31 00:00:18 INFO mapreduce.Job: The url to track the job: http://testdocker01:8088/proxy/application_1406778261600_0004/
14/07/31 00:00:18 INFO mapreduce.Job: Running job: job_1406778261600_0004
14/07/31 00:00:23 INFO mapreduce.Job: Job job_1406778261600_0004 running in uber mode : false
14/07/31 00:00:23 INFO mapreduce.Job:  map 0% reduce 0%
14/07/31 00:00:28 INFO mapreduce.Job:  map 100% reduce 0%
14/07/31 00:00:33 INFO mapreduce.Job:  map 100% reduce 100%
14/07/31 00:00:34 INFO mapreduce.Job: Job job_1406778261600_0004 completed successfully


你可能感兴趣的:(hadoop,docker)