1.安装总览
- 软件版本
hadoop-2.7.2
spark-2.0.1-bin-hadoop2.7
scala-2.11.8
sqoop-1.4.6
mysql-5.6.15
hbase-0.98.24-hadoop2
apache-hive-2.1.1
apache-kylin-2.0.0
zookeeper-3.3.6
apache-flume-1.7.0-bin
- 安装规划
机器IP | 主机名称 | 组件 | 用户 | 密码 |
10.112.171.47 | vm-10-112-171-47 | NameNode ResourceManager, spark, scala |
hadoop | hadoop |
10.112.171.48 | vm-10-112-171-48 | Secondary NameNode,spark, scala |
hadoop | hadoop |
10.112.171.49 | vm-10-112-171-49 | DataNode 、 hive 、mysql, hbase,zk,kylin ,spark, scala |
hadoop | hadoop |
10.112.171.50 | vm-10-112-171-50 | DataNode 、 hive 、 hbase,zk,sqoop ,flume ,spark, scala |
hadoop | hadoop |
10.112.171.51 | vm-10-112-171-51 | DataNode 、 hive 、 hbase,zk,spark, scala |
hadoop | hadoop |
- 预期目标
使用flume采集日志数据到hdfs,利用spark等工具进行日志分析
使用sqoop从oracle,mysql数据库导入数据到hdfs,利用kylin进行统计分析
2.hadoop安装
- 在各台机器上创建用户
#创建用户并指定主目录 useradd -d /home/hadoop -m hadoop #设置登录密码 passwd hadoop #设置hadoop目录及文件的修改权限 chmod 777 /home/hadoop #更改hadoop目录的所属用户和所属组 chown -R hadoop:hadoop /home/hadoop #设置hadoop登录 cp -a /etc/skel/. /home/hadoop/ #以hadoop用户登录各台机器,创建如下目录 cd /home/hadoop/ mkdir tmp mkdir journal
- 在各台机器上设置环境变量
vi /etc/profile # 在结尾处添加HADOOP环境变量,类似于JAVA_HOME的配置 export HADOOP_HOME=/home/hadoop/hadoop-2.7.2 export PATH=$PATH:$HADOOP_HOME/bin # 刷新配置文件 source /etc/profile
- 在各台机器上配置host
vi /etc/hosts #在后边追加 10.112.171.47 vm-10-112-171-47 10.112.171.48 vm-10-112-171-48 10.112.171.49 vm-10-112-171-49 10.112.171.50 vm-10-112-171-50 10.112.171.51 vm-10-112-171-51
- 在各台机器上安装hadoop-2.7.2
首先在10.112.171.47节点上安装hadoop-2.7.2,安装目录为:/home/hadoop/hadoop-2.7.2 ,进入目录/home/hadoop/hadoop-2.7.2/etc/hadoop,修改hdfs-site.xml文件:
dfs.nameservices CashierHadoop dfs.ha.namenodes.CashierHadoop nn1,nn2 dfs.namenode.rpc-address.CashierHadoop.nn1 vm-10-112-171-47:9000 dfs.namenode.http-address.CashierHadoop.nn1 vm-10-112-171-47:50070 dfs.namenode.rpc-address.CashierHadoop.nn2 vm-10-112-171-48:9000 dfs.namenode.http-address.CashierHadoop.nn2 vm-10-112-171-48:50070 dfs.namenode.shared.edits.dir qjournal://vm-10-112-171-49:8485;vm-10-112-171-50:8485;vm-10-112-171-51:8485/CashierHadoop dfs.journalnode.edits.dir /app/hadoop/journal dfs.ha.automatic-failover.enabled true dfs.client.failover.proxy.provider.CashierHadoop org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider dfs.ha.fencing.methods sshfence dfs.ha.fencing.ssh.private-key-files /app/hadoop/.ssh/id_rsa nfs.dump.dir /tmp/.hdfs-nfs nfs.rtmax 1048576 This is the maximum size in bytes of a READ request supported by the NFS gateway. If you change this, make sure you also update the nfs mount's rsize(add rsize= # of bytes to the mount directive). nfs.wtmax 65536 This is the maximum size in bytes of a WRITE request supported by the NFS gateway. If you change this, make sure you also update the nfs mount's wsize(add wsize = # of bytes to the mount directive). nfs.exports.allowed.hosts * rw allow All Hosts read and write
- 修改core-site.xml文件(zk安装机器见总体规划)
fs.defaultFS hdfs://CashierHadoop hadoop.tmp.dir /app/hadoop/tmp ha.zookeeper.quorum 10.112.171.49:2181,10.112.171.50:2181,10.112.171.51:2181 nfs.superuser hadoop hadoop.proxyuser.root.groups * Allow the superuser oozie to impersonate any members of the group group1 and group2 hadoop.proxyuser.root.hosts * The superuser can connect only from host1 and host2 to impersonate a user hadoop.proxyuser.hadoop.groups hadoop Allow the superuser oozie to impersonate any members of the group group1 and group2 hadoop.proxyuser.hadoop.hosts 10.112.171.49 The superuser can connect only from host1 and host2 to impersonate a user
- 修改mapred-site.xml文件
mapreduce.framework.name yarn
- 修改yarn-site.xml文件
yarn.resourcemanager.hostname vm-10-112-171-47 yarn.nodemanager.aux-services mapreduce_shuffle
- 修改slaves文件
vm-10-112-171-49 vm-10-112-171-50 vm-10-112-171-51
- 设置管理节点和其他节点之间各台机器包括本机相互信任(非常重要)
#在10.112.171.47机器上运行如下命令生成公钥和私钥 ssh-keygen -t rsa #将公钥copy到其他机器节点上,以下命令单条执行 ssh-copy-id -i vm-10-112-171-47 ssh-copy-id -i vm-10-112-171-48 ssh-copy-id -i vm-10-112-171-49 ssh-copy-id -i vm-10-112-171-50 ssh-copy-id -i vm-10-112-171-51 #在10.112.171.48机器上运行如下命令生成公钥和私钥 ssh-keygen -t rsa #将公钥copy到其他机器节点上,以下命令单条执行 ssh-copy-id -i vm-10-112-171-47 ssh-copy-id -i vm-10-112-171-48 ssh-copy-id -i vm-10-112-171-49 ssh-copy-id -i vm-10-112-171-50 ssh-copy-id -i vm-10-112-171-51
- 将在10.112.171.47机器上安装好的hadoop-2.7.2拷贝到其他机器的同级目录下 /home/hadoop/hadoop-2.7.2
3.hadoop启动
- 启动namenode日志同步节点
cd /home/hadoop/hadoop-2.7.2/sbin ./hadoop-daemons.sh start journalnode
- 第一次启动需要格式化namenode
cd /home/hadoop/hadoop-2.7.2/bin hadoop namenode -format
- 第一次需要格式化zookeeper,在namenode上执行,执行完毕后,将tmp目录下的文件拷贝到备用namenode节点上
hdfs zkfc -formatZK