一 . 一般hadoop 1.xx集群出现的问题
hadoop集群
二 . hadoop 2.xx的搭建(ha-hadoop)
1 . 网络设置
每个主机的主机名和ip都能ping通,主机之间按照主机名和ip的能连接成功。即配置每台主机的一些配置文件,命令如下:
详情参照:http://blog.csdn.net/qq_38776653/article/details/73550714
vi /etc/hostname(给每台主机起主机名)
vi /etc/sysconfig/network-script/ifcfg-ens33(配置每台主机ip地址(静态的ip,配置默认网关)))
vi /etc/hosts(设置主机名和IP的映射关系,这里将从机和主机所有的机子的映射关系都写上)
同时创建hadoop用户,为了更好的区分每个用户的功能创建一个专门处理和管理集群的用户,并且给用户一些root的临时权限。
命令:
root用户下执行:visudo
在文件90行左右的root ALL=(ALL) ALL
的下一行添加:用户名 ALL=(ALL) ALL
中间是一个制表符
2、主机和从机安装jdk、解压hadoop文件以及配置当前用户的环境变量
安装jdk
```
# Java Environment Variables
export JAVA_HOME=/usr/java/jdk1.8.0_121
export PATH=$PATH:$JAVA_HOME/bin
# Hadoop Environment Variables
export HADOOP_HOME=/home/hadoop/hadoop-2.7.3
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=${HADOOP_HOME}/lib/native"
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
# Zookeeper Environment Variables
export ZOOKEEPER_HOME=/home/hadoop/zookeeper-3.4.9
export PATH=$PATH:$ZOOKEEPER_HOME/bin
#HBase Environment Variables
export HBASE_HOME=/home/hadoop/hbase-1.2.6
export PATH=$PATH:$HBASE_HOME/bin
`
hadoop配置文件/home/hadoop/hadoop-2.7.3/etc/hadoop
core-site.xml
<configuration>
<--dfs的默认文件系统的地址-->
<property>
<name>fs.defaultFSname>
<value>hdfs://namenode:9000value>
property>
<property>
<--hadoop集群的文件存储位置-->
<name>hadoop.tmp.dirname>
<value>/home/hadoop/hadoop-2.7.3/tmpvalue>
property>
<property>
<--zookeeper的守护进程-->
<name>zookeeper.quorumname>
<value>datanode1:2181,datanode2:2181value>
property>
configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-addressname>
<value>namenode:50090value>
property>
<property>
<name>dfs.replicationname>
<value>1value>
property>
<property>
<name>dfs.namenode.name.dirname>
<value>file:/home/hadoop/hadoop-2.7.3/tmp/dfs/namevalue>
property>
<property>
<name>dfs.datanode.data.dirname>
<value>file:/home/hadoop/hadoop-2.7.3/tmp/dfs/datavalue>
property>
<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://datanode1:8485;datanode2:8485value>
property>
<property>
<name>dfs.journalnode.edits.dirname>
<value>/home/hadoop/hadoop-2.7.3/journaldatavalue>
property>
<property>
<name>dfs.automatic-failover.enabledname>
<value>truevalue>
property>
<property>
<name>dfs.client.failover.proxy.provider.clustername>
<value>org.apache.hadoop.hdfs.server.namenode.ConfiguredFailoverProxyProvidervalue>
property>
<property>
<name>dfs.ha.fencing.methodsname>
<value>
sshfence
shell(/bin/true)
value>
property>
<property>
<name>dfs.fencing.ssh.private-key-filesname>
<value>/home/hadoop/.ssh/id_rsavalue>
property>
<property>
<name>dfs.fencing.ssh.connect-timeoutname>
<value>600000value>
property>
configuration>
yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.hostnamename>
<value>namenodevalue>
property>
<property>
<name>yarn.resourcemanager.zk-addressname>
<value>datanode1:2181,datanode2:2181value>
property>
<property>
<name>yarn.nodemanager.aux-servicesname>
<value>mapreduce_shufflevalue>
property>
configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.namename>
<value>yarnvalue>
property>
configuration>
slaves
datanode1
datanode2
zookeeper配置文件/home/hadoop/zookeeper-3.4.9/conf/zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
#dataDir=/tmp/zookeeper 在你的主机中建立相应的目录
dataDir=/home/hadoop/zookeeper-3.4.9/data
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.1=datanode1:2888:3888
server.2=datanode2:2888:3888
在路径下/home/hadoop/zookeeper-3.4.9/data下datanode1,datanode2创建内容为1,2的myid文件
永久关闭防火墙设置/etc/selinux/config
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - No SELinux policy is loaded.
SELINUX=disabled
# SELINUXTYPE= can take one of three two values:
# targeted - Targeted processes are protected,
# minimum - Modification of targeted policy. Only selected processes are protected.
# mls - Multi Level Security protection.
SELINUXTYPE=targeted