树莓派hadoop集群搭建

软件版本:

hadoop-2.6.4;hbase-0.98.20-hadoop2;zookeeper-3.4.6

使用的源:

deb  http://mirrors.ustc.edu.cn/raspbian/raspbian/ jessie main contrib non-free rpi
deb-src  http://mirrors.ustc.edu.cn/raspbian/raspbian/ jessie main contrib non-free rpi

结构:

主机名          IP               安装的软件                           运行的进程
nna        192.168.11.81        jdk、hadoop                           NameNode、DFSZKFailoverController(zkfc)
nns        192.168.11.82        jdk、hadoop                           NameNode、DFSZKFailoverController(zkfc)
rma        192.168.11.83        jdk、hadoop                           ResourceManager
rms        192.168.11.84        jdk、hadoop                           ResourceManager
hba        192.168.11.85        jdk、hadoop、hbase                    HMaster
hbs        192.168.11.86        jdk、hadoop、hbase                    HMaster
dn1        192.168.11.91        jdk、hadoop、zookeeper、hbase         DataNode、NodeManager、JournalNode、QuorumPeerMain、HRegionServer
dn2        192.168.11.92        jdk、hadoop、zookeeper、hbase         DataNode、NodeManager、JournalNode、QuorumPeerMain、HRegionServer
dn3        192.168.11.93        jdk、hadoop、zookeeper、hbase         DataNode、NodeManager、JournalNode、QuorumPeerMain、HRegionServer 


1.创建hadoop用户(root下操作)

adduser hadoop
chmod +w /etc/sudoers 
hadoop ALL=(root)NOPASSWD:ALL 
chmod -w /etc/sudoers

2.同步时间

sudo cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

3.U盘开机自动挂载

U盘格式为fat32 == vfat

uid为用户ID,gid为用户组ID,id命令查看

修改/etc/fstab,在末尾添加

/dev/sda1       /hadoop        vfat    suid,exec,dev,noatime,user,utf8,rw,auto,async,uid=1001,gid=1001    0   0

4.配置hosts

修改/etc/hosts

192.168.11.81 nna
192.168.11.82 nns
192.168.11.83 mra
192.168.11.84 mrs
192.168.11.91 dn1
192.168.11.92 dn2
192.168.11.93 dn3

修改/etc/hotname

nna

5.安装jdk


安装openjdk或orcaljdk

sudo apt-cache search jdk

sudo apt-get install openjdk-8-jdk

sudo apt-get install oracle-java8-jdk

6.配置环境变量

修改/etc/profile

# set java environment
export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/
export JRE_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/jre
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JRE_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin

# set hadoop environment
export HADOOP_HOME=/home/hadoop/hadoop-2.6.4
export PATH=$PATH:$HADOOP_HOME/bin

# set zookeeper environment
export ZK_HOME=/home/hadoop/zookeeper-3.4.6
export PATH=$PATH:$ZK_HOME/bin

# set hbase environment
export HBASE_HOME=/home/hadoop/hbase-0.98.20-hadoop2
export PATH=$PATH:$HBASE_HOME/bin

7.创建目录


mkdir -p /hadoop/tmp
mkdir -p /hadoop/data/tmp/journal
mkdir -p /hadoop/data/dfs/name
mkdir -p /hadoop/data/dfs/data
mkdir -p /hadoop/data/yarn/local
mkdir -p /hadoop/data/zookeeper
mkdir -p /hadoop/log/yarn

8.安装zookeeper


修改 ~/zookeeper-3.4.6/conf/zoo.cfg
# The number of milliseconds of each tick
# 服务器与客户端之间交互的基本时间单元(ms)
tickTime=2000

# The number of ticks that the initial
# synchronization phase can take
# zookeeper所能接受的客户端数量
initLimit=10

# The number of ticks that can pass between
# sending a request and getting an acknowledgement
# 服务器和客户端之间请求和应答之间的时间间隔
syncLimit=5

# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
# 保存zookeeper数据,日志的路径
dataDir=/hadoop/data/zookeeper

# the port at which the clients will connect
# 客户端与zookeeper相互交互的端口

clientPort=2181
server.1=dn1:2888:3888
server.2=dn2:2888:3888
server.3=dn3:2888:3888

# server.A=B:C:D
# 其中A是一个数字,代表这是第几号服务器;B是服务器的IP地址;
# C表示服务器与群集中的“领导者”交换信息的端口;当领导者失效后,D表示用来执行选举时服务器相互通信的端口。


# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60


#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir

#autopurge.snapRetainCount=3

# Purge task interval in hours
# Set to "0" to disable auto purge feature

#autopurge.purgeInterval=1

接下来,dn节点下的的dataDir目录下创建一个myid文件,里面写入一个0-255之间的一个随意数字,
文件中序号要与dn节点下的zk配置序号一直,
如:server.1=dn1:2888:3888,那么dn1节点下的myid配置文件应该写上1


9.安装hadoop

修改/etc/hadoop/slaves

dn1
dn2
dn3

修改/etc/hadoop/hadoop-env.sh

# The java implementation to use.
export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/

修改/etc/hadoop/yarn-env.sh

# some Java parameters
export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/

修改/etc/hadoop/core-site.xml


	
    
        fs.defaultFS
        hdfs://cluster
    

    
        io.file.buffer.size
        65535
    

    
    
        hadoop.tmp.dir
        /hadoop/tmp
    

    
        hadoop.proxyuser.hduser.hosts
        *
    

    
        hadoop.proxyuser.hduser.groups
        *
    

    
    
        ha.zookeeper.quorum
        dn1:2181,dn2:2181,dn3:2181
    

修改/etc/hadoop/hdfs-site.xml


	
    
        dfs.nameservices
        cluster
    

    
    
        dfs.ha.namenodes.cluster
        nna,nns
    

    
    
        dfs.namenode.rpc-address.cluster.nna
        nna:9000
    

    
    
        dfs.namenode.rpc-address.cluster.nns
        nns:9000
    

    
    
        dfs.namenode.http-address.cluster.nna
        nna:50070
    

    
    
        dfs.namenode.http-address.cluster.nns
        nns:50070
    

    
    
        dfs.namenode.shared.edits.dir
        qjournal://dn1:8485;dn2:8485;dn3:8485/cluster
    

	
    
        dfs.journalnode.edits.dir
        /hadoop/data/tmp/journal
    

    
    
        dfs.ha.automatic-failover.enabled
        true
    

     
    
        dfs.client.failover.proxy.provider.cluster
        org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
    

    
    
        dfs.ha.fencing.methods
        sshfence
    

    
    
        dfs.ha.fencing.ssh.private-key-files
        /home/hadoop/.ssh/id_rsa
    

    
    
        dfs.ha.fencing.ssh.connect-timeout
        30000
    

    
        dfs.namenode.name.dir
        /hadoop/data/dfs/name
    
    
        dfs.datanode.data.dir
        /hadoop/data/dfs/data
    
    
        dfs.replication
        3
    
    
        dfs.webhdfs.enabled
        true
    

    
        dfs.journalnode.http-address
        0.0.0.0:8480
    
    
        dfs.journalnode.rpc-address
        0.0.0.0:8485
    
    
        ha.zookeeper.quorum
        dn1:2181,dn2:2181,dn3:2181
    

修改/etc/hadoop/mapred-site.xml


	
    
        mapreduce.framework.name
        yarn
    
    
        mapreduce.jobhistory.address
        nna:10020
    
    
        mapreduce.jobhistory.webapp.address
        nna:19888
    

修改/etc/hadoop/yarn-site.xml


    
        yarn.resourcemanager.connect.retry-interval.ms
        2000
    

    
    
        yarn.resourcemanager.ha.enabled
        true
    

    
    
        yarn.resourcemanager.ha.rm-ids
        rm1,rm2
    
    
        ha.zookeeper.quorum
        dn1:2181,dn2:2181,dn3:2181
    

    
        yarn.resourcemanager.ha.automatic-failover.enabled
        true
    

    
    
        yarn.resourcemanager.hostname.rm1
        nna
    

    
    
        yarn.resourcemanager.hostname.rm2
        nns
    

    
    
        yarn.resourcemanager.ha.id
        rm1
    

    
    
        yarn.resourcemanager.recovery.enabled
        true
    

    
    
        yarn.resourcemanager.zk-state-store.address
        dn1:2181,dn2:2181,dn3:2181
    

    
        yarn.resourcemanager.store.class
        org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
    

     
    
        yarn.resourcemanager.zk-address
        dn1:2181,dn2:2181,dn3:2181
    

    
    
        yarn.resourcemanager.cluster-id
        cluster1-yarn
    

    
    
        yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms
        5000
    

    
    
        yarn.resourcemanager.address.rm1
        nna:8132
    
    
        yarn.resourcemanager.scheduler.address.rm1
        nna:8130
    
    
        yarn.resourcemanager.webapp.address.rm1
        nna:8188
    
    
        yarn.resourcemanager.resource-tracker.address.rm1
        nna:8131
    
    
        yarn.resourcemanager.admin.address.rm1
        nna:8033
    
    
        yarn.resourcemanager.ha.admin.address.rm1
        nna:23142
    

    
    
        yarn.resourcemanager.address.rm2
        nns:8132
    
    
        yarn.resourcemanager.scheduler.address.rm2
        nns:8130
    
    
        yarn.resourcemanager.webapp.address.rm2
        nns:8188
    
    
        yarn.resourcemanager.resource-tracker.address.rm2
        nns:8131
    
    
        yarn.resourcemanager.admin.address.rm2
        nns:8033
    
    
        yarn.resourcemanager.ha.admin.address.rm2
        nns:23142
    

    
        yarn.nodemanager.aux-services
        mapreduce_shuffle
    
    
        yarn.nodemanager.aux-services.mapreduce.shuffle.class
        org.apache.hadoop.mapred.ShuffleHandler
    
    
        yarn.nodemanager.local-dirs
        /home/hadoop/data/yarn/local
    
    
        yarn.nodemanager.log-dirs
        /home/hadoop/log/yarn
    
    
        mapreduce.shuffle.port
        23080
    

    
    
        yarn.client.failover-proxy-provider
        org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider
    
    
        yarn.resourcemanager.ha.automatic-failover.zk-base-path
        /yarn-leader-election
    


10.安装hbase


覆盖hbase中lib文件夹下的 zookeeper*.jar 文件

rm -rf hbase-0.98.20-hadoop2/lib/zookeeper*.jar
find zookeeper-3.4.6/ -name "zookeeper*.jar" | xargs -i cp {} hbase-0.98.20-hadoop2/lib/

覆盖hbase中lib文件夹下的 hadoop*.jar 文件

rm -rf hbase-0.98.20-hadoop2/lib/hadoop*.jar
find hadoop-2.6.4/share/hadoop -name "hadoop*.jar" | xargs -i cp {} hbase-0.98.20-hadoop2/lib/

修改conf/hbase-env.sh

export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/
export HBASE_MANAGES_ZK=flase   //HBase是否管理它自己的ZooKeeper的实例。

修改conf/regionservers
dn1
dn2
dn3


修改conf/hbase-site.xml
$HBASE_HOME/conf/hbase-site.xml的hbase.rootdir的主机和端口号与$HADOOP_HOME/conf/core-site.xml的fs.default.name的主机和端口号一致



	
		hbase.rootdir
		hdfs://nna:9000/hbase
	

	
		hbase.cluster.distributed
		true
		The mode the cluster will be in. Possible values are
            false: standalone and pseudo-distributed setups with managed
            Zookeeper
            true: fully-distributed with unmanaged Zookeeper Quorum (see
            hbase-env.sh)
        
	

	
		hbase.master
		nna:60000
	

	
		hbase.master.port
		60000
		The port master should bind to.
	
 
	
		hbase.zookeeper.quorum
		dn1:2181,dn2:2181,dn3:2181
	

	
        hbase.zookeeper.property.clientPort
        2181
    

    
        hbase.zookeeper.property.dataDir
        /hadoop/data/zookeeper
        Property from ZooKeeper config zoo.cfg.
            The directory
            where the snapshot is stored.
        
    



11.备份镜像,并刻录至各个节点


修改dn节点下的的dataDir目录下的myid文件

12.配置免密码登陆


ssh-keygen -t rsa

ssh-copy-id -i Master

ssh-copy-id -i ~/.ssh/id_rsa.pub nna
ssh-copy-id -i ~/.ssh/id_rsa.pub nns
ssh-copy-id -i ~/.ssh/id_rsa.pub dn1
ssh-copy-id -i ~/.ssh/id_rsa.pub dn2
ssh-copy-id -i ~/.ssh/id_rsa.pub dn3



13.初始化并启动各个模块


//------------------------------------------------------------------------
方案一

启动zookeeper

在 dn1、dn2、dn3上启动

#./zookeeper-3.4.6/bin/zkServer.sh start
#./zookeeper-3.4.6/bin/zkServer.sh restart 

在 dn1、dn2、dn3上查看状态:一个leader,两个follower
#./zookeeper-3.4.6/bin/zkServer.sh status

在 dn1、dn2、dn3上启动
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start journalnode

在 nna 上格式化hdfs
hadoop namenode –format

格式化后会在根据core-site.xml中的hadoop.tmp.dir配置生成个文件
拷贝至nns、dn1、dn2、dn3
scp -r /hadoop/data/dfs/name/current hadoop@nns:/hadoop/data/dfs/name/current
scp -r /hadoop/data/dfs/name/current hadoop@dn1:/hadoop/data/dfs/name/current
scp -r /hadoop/data/dfs/name/current hadoop@dn2:/hadoop/data/dfs/name/current
scp -r /hadoop/data/dfs/name/current hadoop@dn3:/hadoop/data/dfs/name/current

在 nna、nns上格式化ZK
#hdfs zkfc -formatZK

在 nna 上启动HDFS
#./hadoop-2.6.4/sbin/start-dfs.sh

启动rma的YARN
#./hadoop-2.6.4/sbin/start-yarn.sh

启动rms的YARN
#./hadoop-2.6.4/sbin/yarn-daemon.sh start resourcemanager

启动hbase

在hba上启动hbase
start-hbase.sh

在hbs上启动hbase
hbase-daemon.sh start master


//------------------------------------------------------------------------
方案二

启动zookeeper

在 dn1、dn2、dn3上启动
#./zookeeper-3.4.6/bin/zkServer.sh start
#./zookeeper-3.4.6/bin/zkServer.sh restart 

在 dn1、dn2、dn3上查看状态:一个leader,两个follower
#./zookeeper-3.4.6/bin/zkServer.sh status

在 dn1、dn2、dn3上启动
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start journalnode

格式化nna的NameNode
hdfs namenode  –format

启动nna的NameNode
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start namenode

格式化nns的NameNode
hdfs namenode  -bootstrapStandby

启动nns的NameNode
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start namenode

在nna转换active
hdfs haadmin -transitionToActive nna

在nna启动DataNodes
#./hadoop-2.6.4/sbin/hadoop-daemons.sh  start datanode

切换nna、nns角色
hdfs haadmin –failover –forceactive nna nns

启动rma的YARN
#./hadoop-2.6.4/sbin/start-yarn.sh

启动rms的YARN
#./hadoop-2.6.4/sbin/yarn-daemon.sh start resourcemanager

启动hbase

在hba上启动hbase
start-hbase.sh

在hbs上启动hbase
hbase-daemon.sh start master

//------------------------------------------------------------------------

14.关闭集群


在hbs上关闭hbase

hbase-daemon.sh stop master

在hba上关闭hbase
stop-hbase.sh

关闭rms的YARN
#./hadoop-2.6.4/sbin/yarn-daemon.sh stop resourcemanager

关闭rma的YARN
#./hadoop-2.6.4/sbin/stop-yarn.sh

在 nna 上关闭HDFS
#./hadoop-2.6.4/sbin/stop-dfs.sh

在 dn1、dn2、dn3上关闭zookeeper

#./zookeeper-3.4.6/bin/zkServer.sh stop

15.再次启动


在 dn1、dn2、dn3上启动zookeeper

#./zookeeper-3.4.6/bin/zkServer.sh start

在 dn1、dn2、dn3上查看状态:一个leader,两个follower
#./zookeeper-3.4.6/bin/zkServer.sh status

在 dn1、dn2、dn3上启动
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start journalnode

在 nna 上启动HDFS
#./hadoop-2.6.4/sbin/start-dfs.sh

启动rma的YARN
#./hadoop-2.6.4/sbin/start-yarn.sh

启动rms的YARN
#./hadoop-2.6.4/sbin/yarn-daemon.sh start resourcemanager

在hba上启动hbase
start-hbase.sh

在hbs上启动hbase
hbase-daemon.sh start master

16.验证


http://nna:50070
http://nns:50070

http://192.168.11.81:8188
http://192.168.11.81:8188

http://hba:60010
http://hbs:60010

19888

17.增加节点





你可能感兴趣的:(hadoop)