【大数据】HDFS,YARN,HADOOP集群部署

 【大数据】HDFS,YARN,HADOOP集群部署_第1张图片

1:准备好环境包
[root@hadoop001 data]# ll
总用量 532404
-rw-r--r-- 1 root root  42610549 3月  31 09:22 hadoop-2.6.0-cdh5.7.0-src.tar.gz
-rw-r--r-- 1 root root 311585484 3月  31 09:24 hadoop-2.6.0-cdh5.7.0.tar.gz
-rw-r--r-- 1 root root 173271626 3月  31 09:23 jdk-8u45-linux-x64.gz
-rw-r--r-- 1 root root  17699306 3月  31 09:23 zookeeper-3.4.6.tar.gz
1:创建hadoop用户
[root@hadoop001 ~]# which useradd
/usr/sbin/useradd
[root@hadoop001 ~]# useradd hadoop
[root@hadoop001 ~]# su - hadoop
[hadoop@hadoop001 ~]$ mkdir app

[root@hadoop001 data]# ll
总用量 532404
-rw-r--r-- 1 root root  42610549 3月  31 09:22 hadoop-2.6.0-cdh5.7.0-src.tar.gz
-rw-r--r-- 1 root root 311585484 3月  31 09:24 hadoop-2.6.0-cdh5.7.0.tar.gz
-rw-r--r-- 1 root root 173271626 3月  31 09:23 jdk-8u45-linux-x64.gz
-rw-r--r-- 1 root root  17699306 3月  31 09:23 zookeeper-3.4.6.tar.gz
[root@hadoop001 data]# mv * /home/hadoop/app/
1:配置多用户ssh信任关系
[hadoop@hadoop001 ~]$ rm -rf .ssh
[hadoop@hadoop001 ~]$ ssh-keygen
都是回车
[hadoop@hadoop001 ~]$ cd .ssh
[hadoop@hadoop001 .ssh]$ ll
total 8
-rw------- 1 hadoop hadoop 1675 Mar 31 10:43 id_rsa
-rw-r--r-- 1 hadoop hadoop  398 Mar 31 10:43 id_rsa.pub

[hadoop@hadoop001 .ssh]$ cat id_rsa.pub >> authorized_keys

[hadoop@hadoop002 ~]$ scp .ssh/id_rsa.pub [email protected]:/home/hadoop/.ssh/id_rsa2

[hadoop@hadoop001 .ssh]$ ll
total 20
-rw-rw-r-- 1 hadoop hadoop  398 Mar 31 10:48 authorized_keys
-rw------- 1 hadoop hadoop 1675 Mar 31 10:43 id_rsa
-rw-r--r-- 1 root   root    398 Mar 31 10:54 id_rsa2
-rw-r--r-- 1 root   root    398 Mar 31 10:56 id_rsa3
-rw-r--r-- 1 hadoop hadoop  398 Mar 31 10:43 id_rsa.pub

[hadoop@hadoop001 .ssh]$ cat id_rsa2 >> authorized_keys 
[hadoop@hadoop001 .ssh]$ cat id_rsa3 >> authorized_keys

[hadoop@hadoop001 .ssh]$ exit 

[root@hadoop001 ~]# vi /etc/hosts

172.16.121.228  hadoop001       hadoop001
172.16.121.227  hadoop002       hadoop002
172.16.121.229  hadoop003       hadoop003

[root@hadoop001 ~]# su - hadoop

[hadoop@hadoop001 ~]$ scp .ssh/authorized_keys root@hadoop002:/home/hadoop/.ssh/
[hadoop@hadoop001 ~]$ scp .ssh/authorized_keys root@hadoop003:/home/hadoop/.ssh/

验证权限
[hadoop@hadoop001 ~]$ ssh hadoop001 date
[hadoop@hadoop001 .ssh]$ chmod 600 authorized_keys 

[hadoop@hadoop002 .ssh]$ ssh hadoop001 date
Sun Mar 31 11:15:18 CST 2019
[hadoop@hadoop002 .ssh]$ ssh hadoop002 date
Sun Mar 31 11:15:21 CST 2019
[hadoop@hadoop002 .ssh]$ ssh hadoop003 date
Sun Mar 31 11:15:25 CST 2019
1:jdk部署

[root@hadoop001 ~]# mkdir /usr/java

[root@hadoop001 ~]# tar -zxvf /home/hadoop/app/jdk-8u45-linux-x64.gz -C /usr/java/
[root@hadoop001 usr]# chown -R root:root /usr/java/

[hadoop@hadoop001 zookeeper]$ vi ~/.bash_profile 

export JAVA_HOME=/usr/java/jdk1.8.0_45/
PATH=$JAVA_HOME/bin:$PATH:$HOME/bin

export PATH
1:防火墙
[root@hadoop001 usr]# service iptables status
iptables: Firewall is not running.

[root@hadoop001 usr]# service iptables status
iptables: Firewall is not running.
[root@hadoop001 usr]# iptables -L
Chain INPUT (policy ACCEPT)
target     prot opt source               destination         

Chain FORWARD (policy ACCEPT)
target     prot opt source               destination         

Chain OUTPUT (policy ACCEPT)
target     prot opt source               destination         

[root@hadoop001 usr]# iptables -F
1:部署zookeeper
[root@hadoop001 ~]# su - hadoop
[hadoop@hadoop001 ~]$ cd app/

[hadoop@hadoop001 app]$ tar -zxvf zookeeper-3.4.6.tar.gz 

创建软连接
[hadoop@hadoop001 app]$ ln -s /home/hadoop/app/zookeeper-3.4.6 /home/hadoop/app/zookeeper

[hadoop@hadoop001 app]$ cd zookeeper
[hadoop@hadoop001 zookeeper]$ cd conf/
[hadoop@hadoop001 conf]$ cp zoo_sample.cfg zoo.cfg 

[hadoop@hadoop001 zookeeper]$ mkdir data

[hadoop@hadoop001 conf]$ vi zoo.cfg 
dataDir=/home/hadoop/app/zookeeper/data

server.1=hadoop001:2888:3888
server.2=hadoop002:2888:3888
server.3=hadoop003:2888:3888

[hadoop@hadoop001 zookeeper]$ touch data/myid
[hadoop@hadoop001 zookeeper]$ echo 1 > data/myid 
[hadoop@hadoop001 zookeeper]$ scp conf/zoo.cfg hadoop002:/home/hadoop/app/zookeeper/conf/
[hadoop@hadoop001 zookeeper]$ scp conf/zoo.cfg hadoop003:/home/hadoop/app/zookeeper/conf/

[hadoop@hadoop001 zookeeper]$ scp -r data/ hadoop002:/home/hadoop/app/zookeeper
[hadoop@hadoop001 zookeeper]$ scp -r data/ hadoop003:/home/hadoop/app/zookeeper

[hadoop@hadoop002 data]$ echo 2 > myid
[hadoop@hadoop003 data]$ echo 3 > myid

启动zookeeper
[hadoop@hadoop003 zookeeper]$ cd bin/

[hadoop@hadoop001 zookeeper]$ vi ~/.bash_profile 

export JAVA_HOME=/usr/java/jdk1.8.0_45
export ZOOKEEPER_HOME=/home/hadoop/app/zookeeper
PATH=$JAVA_HOME/bin:$ZOOKEEPER_HOME/bin:$PATH:$HOME/bin

export PATH


[hadoop@hadoop001 bin]$ ./zkServer.sh start


效果
[hadoop@hadoop001 bin]$ zkServer.sh status
JMX enabled by default
Using config: /home/hadoop/app/zookeeper/bin/../conf/zoo.cfg
Mode: follower

[hadoop@hadoop002 bin]$ zkServer.sh status
JMX enabled by default
Using config: /home/hadoop/app/zookeeper/bin/../conf/zoo.cfg
Mode: leader

[hadoop@hadoop003 bin]$ zkServer.sh status
JMX enabled by default
Using config: /home/hadoop/app/zookeeper/bin/../conf/zoo.cfg
Mode: follower

 

hadoop配置文件之core-site.xml




	
        
                fs.defaultFS
                hdfs://ruozeclusterg6
        
        
        
                
                fs.trash.checkpoint.interval
                0
        
        
                
                fs.trash.interval
                1440
        

         
           
                hadoop.tmp.dir
                /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/tmp
        

         
        
                ha.zookeeper.quorum
                hadoop001:2181,hadoop002:2181,hadoop003:2181
        
         
        
                ha.zookeeper.session-timeout.ms
                2000
        

        
           hadoop.proxyuser.hadoop.hosts
           * 
         
         
            hadoop.proxyuser.hadoop.groups 
            * 
        


      
		  io.compression.codecs
		  org.apache.hadoop.io.compress.GzipCodec,
			org.apache.hadoop.io.compress.DefaultCodec,
			org.apache.hadoop.io.compress.BZip2Codec,
			org.apache.hadoop.io.compress.SnappyCodec
		  
      

 

hadoop配置文件之hdfs-site.xml




	
	
		dfs.permissions.superusergroup
		hadoop
	

	
	
		dfs.webhdfs.enabled
		true
	
	
		dfs.namenode.name.dir
		/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/name
		 namenode 存放name table(fsimage)本地目录(需要修改)
	
	
		dfs.namenode.edits.dir
		${dfs.namenode.name.dir}
		namenode粗放 transaction file(edits)本地目录(需要修改)
	
	
		dfs.datanode.data.dir
		/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/data
		datanode存放block本地目录(需要修改)
	
	
		dfs.replication
		3
	
	
	
		dfs.blocksize
		268435456
	
	
	
	
	
		dfs.nameservices
		ruozeclusterg6
	
	
		
		dfs.ha.namenodes.ruozeclusterg6
		nn1,nn2
	

	
	
		dfs.namenode.rpc-address.ruozeclusterg6.nn1
		hadoop001:8020
	
	
		dfs.namenode.rpc-address.ruozeclusterg6.nn2
		hadoop002:8020
	

	
	
		dfs.namenode.http-address.ruozeclusterg6.nn1
		hadoop001:50070
	
	
		dfs.namenode.http-address.ruozeclusterg6.nn2
		hadoop002:50070
	

	
	
	
		dfs.journalnode.http-address
		0.0.0.0:8480
	
	
		dfs.journalnode.rpc-address
		0.0.0.0:8485
	
	
		
		
		dfs.namenode.shared.edits.dir
		qjournal://hadoop001:8485;hadoop002:8485;hadoop003:8485/ruozeclusterg6
	

	
		
		dfs.journalnode.edits.dir
		/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/jn
	
	
	
		
                             
		dfs.client.failover.proxy.provider.ruozeclusterg6
		org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
	
	
	
	
		dfs.ha.fencing.methods
		sshfence
	
	
		dfs.ha.fencing.ssh.private-key-files
		/home/hadoop/.ssh/id_rsa
	
	
		
		dfs.ha.fencing.ssh.connect-timeout
		30000
	

	
	
	
		dfs.ha.automatic-failover.enabled
		true
	
	
	 
	   dfs.hosts
	   /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/etc/hadoop/slaves
	 

hadoop配置文件之mapred-site.xml




	
	
		mapreduce.framework.name
		yarn
	
	
	
	
		mapreduce.jobhistory.address
		hadoop001:10020
	
	
	
		mapreduce.jobhistory.webapp.address
		hadoop001:19888
	


  
      mapreduce.map.output.compress 
      true
  
              
  
      mapreduce.map.output.compress.codec 
      org.apache.hadoop.io.compress.SnappyCodec
   


 

hadoop配置文件之yarn-site.xml



	
	
		yarn.nodemanager.aux-services
		mapreduce_shuffle
	
	
		yarn.nodemanager.aux-services.mapreduce.shuffle.class
		org.apache.hadoop.mapred.ShuffleHandler
	
	
		yarn.nodemanager.localizer.address
		0.0.0.0:23344
		Address where the localizer IPC is.
	
	
		yarn.nodemanager.webapp.address
		0.0.0.0:23999
		NM Webapp address.
	

	
	
	
		yarn.resourcemanager.connect.retry-interval.ms
		2000
	
	
		yarn.resourcemanager.ha.enabled
		true
	
	
		yarn.resourcemanager.ha.automatic-failover.enabled
		true
	
	
	
		yarn.resourcemanager.ha.automatic-failover.embedded
		true
	
	
	
		yarn.resourcemanager.cluster-id
		yarn-cluster
	
	
		yarn.resourcemanager.ha.rm-ids
		rm1,rm2
	


    

	
		yarn.resourcemanager.scheduler.class
		org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler
	
	
		yarn.resourcemanager.recovery.enabled
		true
	
	
		yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms
		5000
	
	
	
		yarn.resourcemanager.store.class
		org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
	
	
		yarn.resourcemanager.zk-address
		hadoop001:2181,hadoop002:2181,hadoop003:2181
	
	
		yarn.resourcemanager.zk.state-store.address
		hadoop001:2181,hadoop002:2181,hadoop003:2181
	
	
	
		yarn.resourcemanager.address.rm1
		hadoop001:23140
	
	
		yarn.resourcemanager.address.rm2
		hadoop002:23140
	
	
	
		yarn.resourcemanager.scheduler.address.rm1
		hadoop001:23130
	
	
		yarn.resourcemanager.scheduler.address.rm2
		hadoop002:23130
	
	
	
		yarn.resourcemanager.admin.address.rm1
		hadoop001:23141
	
	
		yarn.resourcemanager.admin.address.rm2
		hadoop002:23141
	
	
	
		yarn.resourcemanager.resource-tracker.address.rm1
		hadoop001:23125
	
	
		yarn.resourcemanager.resource-tracker.address.rm2
		hadoop002:23125
	
	
	
		yarn.resourcemanager.webapp.address.rm1
		hadoop001:8088
	
	
		yarn.resourcemanager.webapp.address.rm2
		hadoop002:8088
	
	
		yarn.resourcemanager.webapp.https.address.rm1
		hadoop001:23189
	
	
		yarn.resourcemanager.webapp.https.address.rm2
		hadoop002:23189
	

	
	   yarn.log-aggregation-enable
	   true
	
	
		 yarn.log.server.url
		 http://hadoop001:19888/jobhistory/logs
	


	
		yarn.nodemanager.resource.memory-mb
		2048
	
	
		yarn.scheduler.minimum-allocation-mb
		1024
		单个任务可申请最少内存,默认1024MB
	 

  
  
	yarn.scheduler.maximum-allocation-mb
	2048
	单个任务可申请最大内存,默认8192MB
  

   
       yarn.nodemanager.resource.cpu-vcores
       2
    



hadoop配置文件之slaves
hadoop001
hadoop002
hadoop003
1:hadoop集群
[hadoop@hadoop001 app]$ tar -zxvf hadoop-2.6.0-cdh5.7.0.tar.gz 
[hadoop@hadoop001 app]$ ln -s /home/hadoop/app/hadoop-2.6.0-cdh5.7.0 /home/hadoop/app/hadoop

[hadoop@hadoop001 app]$ cd hadoop

[hadoop@hadoop001 hadoop]$ cd etc/hadoop
[hadoop@hadoop001 hadoop]$ mkdir -p /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/tmp
[hadoop@hadoop001 hadoop]$ mkdir -p /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/name
[hadoop@hadoop001 hadoop]$ mkdir -p /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/jn

[hadoop@hadoop001 hadoop]$ rm -f core-site.xml hdfs-site.xml yarn-site.xml 
[hadoop@hadoop001 hadoop]$ rm -f slaves

[hadoop@hadoop001 hadoop]$ scp -r core-site.xml hdfs-site.xml mapred-site.xml slaves yarn-site.xml hadoop002:/home/hadoop/app/hadoop/etc/hadoop

[hadoop@hadoop001 hadoop]$ scp -r core-site.xml hdfs-site.xml mapred-site.xml slaves yarn-site.xml hadoop003:/home/hadoop/app/hadoop/etc/hadoop

[hadoop@hadoop001 hadoop]$ vi hadoop-env.sh 
export JAVA_HOME=/usr/java/jdk1.8.0_45

[hadoop@hadoop001 sbin]$ ./hadoop-daemon.sh start journalnode
[hadoop@hadoop002 sbin]$ ./hadoop-daemon.sh start journalnode
[hadoop@hadoop003 sbin]$ ./hadoop-daemon.sh start journalnode

[hadoop@hadoop002 sbin]$ jps
1734 JournalNode
1591 QuorumPeerMain
1785 Jps

格式化hadoop
[hadoop@hadoop001 hadoop]$ hadoop namenode -format

19/04/02 08:29:08 INFO common.Storage: Storage directory /home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/name has been successfully formatted.

或者 格式一台后
[hadoop@hadoop001 hadoop]$ scp -r data/ hadoop002:/home/hadoop/app/hadoop
[hadoop@hadoop001 hadoop]$ scp -r data/ hadoop003:/home/hadoop/app/hadoop
[hadoop@hadoop001 hadoop]$ hdfs zkfc -formatZK
[hadoop@hadoop001 sbin]$ ./start-dfs.sh 
1:集群启动成功信息
[hadoop@hadoop001 sbin]$ jps
1856 NameNode
1635 QuorumPeerMain
2277 DFSZKFailoverController
1688 JournalNode
1962 DataNode
2347 Jps
[hadoop@hadoop001 sbin]$


[hadoop@hadoop002 hadoop]$ jps
1680 JournalNode
2035 DFSZKFailoverController
2115 Jps
1607 QuorumPeerMain
1883 DataNode
1791 NameNode
[hadoop@hadoop002 hadoop]$

[hadoop@hadoop003 sbin]$ jps
1609 QuorumPeerMain
1772 DataNode
1663 JournalNode
1887 Jps
[hadoop@hadoop003 sbin]$

1:启动yarn.sh
[hadoop@hadoop001 sbin]$ ./start-yarn.sh
[hadoop@hadoop002 sbin]$ ./yarn-daemon.sh start resourcemanager

[hadoop@hadoop001 sbin]$ jps
2596 NodeManager
2501 ResourceManager
2070 DataNode
3047 Jps
1591 QuorumPeerMain
1656 JournalNode
2364 DFSZKFailoverController
1934 NameNode


[hadoop@hadoop002 sbin]$ jps
1680 JournalNode
2035 DFSZKFailoverController
2197 NodeManager
1607 QuorumPeerMain
2535 ResourceManager
1883 DataNode
2638 Jps
1791 NameNode
1:访问控制面板
http://116.62.198.161:50070/dfshealth.html#tab-overview
http://118.31.41.202:50070/dfshealth.html#tab-overview
1:
[hadoop@hadoop001 sbin]$ ./mr-jobhistory-daemon.sh start historyserver

[hadoop@hadoop001 sbin]$ netstat -nlp|grep 3256
(Not all processes could be identified, non-owned process info
 will not be shown, you would have to be root to see it all.)
tcp        0      0 172.16.121.228:10020        0.0.0.0:*                   LISTEN      3256/java
tcp        0      0 172.16.121.228:19888        0.0.0.0:*                   LISTEN      3256/java
tcp        0      0 0.0.0.0:10033               0.0.0.0:*                   LISTEN      3256/java

http://116.62.198.161:19888/jobhistory
1:启动hadoop集群
[hadoop@hadoop001 ~]$ zkServer.sh start
[hadoop@hadoop002 ~]$ zkServer.sh start
[hadoop@hadoop003 ~]$ zkServer.sh start

[hadoop@hadoop001 sbin]$ ./start-dfs.sh

 

你可能感兴趣的:(大数据集群,hadoop,集群,zookeeper集群,ha集群)