CentOS6.9+Hadoop2.7.3+Hive1.2.1+Hbase1.3.1+Spark2.1.1

大数据学习环境搭建(CentOS6.9+Hadoop2.7.3+Hive1.2.1+Hbase1.3.1+Spark2.1.1)

    www.ljt.cosa

192.168.1.11 www.ljt.cos02
192.168.1.12 www.ljt.cos03
192.168.1.13 备注
NameNode Hadoop Y Y 高可用
DateNode Y Y Y
ResourceManager Y Y 高可用
NodeManager Y Y Y
JournalNodes Y Y Y 奇数个,至少3个节点
ZKFC(DFSZKFailoverController) Y Y 有namenode的地方就有ZKFC
QuorumPeerMain Zookeeper Y Y Y
MySQL HIVE Y Hive元数据库
Metastore(RunJar) Y
HIVE(RunJar) Y
HMaster HBase Y Y 高可用
HRegionServer Y Y Y
Spark(Master) Spark Y Y 高可用
Spark(Worker) Y Y Y

以前搭建过一套,带Federation,至少需4台机器,过于复杂,笔记本也吃不消。现为了学习Spark2.0版本,决定去掉Federation,简化学习环境,不过还是完全分布式

所有软件包:
apache-ant-1.9.9-bin.tar.gz
apache-hive-1.2.1-bin.tar.gz
apache-maven-3.3.9-bin.tar.gz
apache-tomcat-6.0.44.tar.gz
CentOS-6.9-x86_64-minimal.iso
findbugs-3.0.1.tar.gz
hadoop-2.7.3-src.tar.gz
hadoop-2.7.3.tar.gz
hadoop-2.7.3(自已编译的centOS6.9版本).tar.gz
hbase-1.3.1-bin(自己编译).tar.gz
hbase-1.3.1-src.tar.gz
jdk-8u121-linux-x64.tar.gz
mysql-connector-java-5.6-bin.jar
protobuf-2.5.0.tar.gz
scala-2.11.11.tgz
snappy-1.1.3.tar.gz
spark-2.1.1-bin-hadoop2.7.tgz
关闭防火墙
[[email protected] ~]# service iptables stop
[[email protected] ~]# chkconfig iptables off
zookeeper
[[email protected] ~]# wget -O /root/zookeeper-3.4.9.tar.gz https://mirrors.tuna.tsinghua.edu.cn/apache/zookeeper/zookeeper-3.4.9/zookeeper-3.4.9.tar.gz
[[email protected] ~]# tar -zxvf /root/zookeeper-3.4.9.tar.gz -C /root
[[email protected] ~]# cp /root/zookeeper-3.4.9/conf/zoo_sample.cfg /root/zookeeper-3.4.9/conf/zoo.cfg
[[email protected] ~]# vi /root/zookeeper-3.4.9/conf/zoo.cfg

[[email protected] ~]# vi /root/zookeeper-3.4.9/bin/zkEnv.sh
[[email protected] ~]# mkdir /root/zookeeper-3.4.9/logs

[[email protected] ~]# vi /root/zookeeper-3.4.9/conf/log4j.properties

[[email protected] ~]# mkdir /root/zookeeper-3.4.9/zkData
[[email protected] ~]# scp -r /root/zookeeper-3.4.9 www.ljt.cos02:/root
[[email protected] ~]# scp -r /root/zookeeper-3.4.9 www.ljt.cos03:/root

[[email protected] ~]# touch /root/zookeeper-3.4.9/zkData/myid
[[email protected] ~]# echo 1 > /root/zookeeper-3.4.9/zkData/myid
[[email protected] ~]# touch /root/zookeeper-3.4.9/zkData/myid
[[email protected] ~]# echo 2 > /root/zookeeper-3.4.9/zkData/myid
[[email protected] ~]# touch /root/zookeeper-3.4.9/zkData/myid
[[email protected] ~]# echo 3 > /root/zookeeper-3.4.9/zkData/myid
环境变量
[[email protected] ~]# vi /etc/profile
export JAVA_HOME=/root/jdk1.8.0_121
export SCALA_HOME=/root/scala-2.11.11
export HADOOP_HOME=/root/hadoop-2.7.3
export HIVE_HOME=/root/apache-hive-1.2.1-bin
export HBASE_HOME=/root/hbase-1.3.1
export SPARK_HOME=/root/spark-2.1.1-bin-hadoop2.7
export PATH=.: PATH: JAVA_HOME/bin: SCALAHOME/bin: HADOOP_HOME/bin: HADOOPHOME/sbin:/root: HIVE_HOME/bin: HBASEHOME/bin: SPARK_HOME
export CLASSPATH=.: JAVAHOME/jre/lib/rt.jar: JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
[[email protected] ~]# source /etc/profile
[[email protected] ~]# scp /etc/profile www.ljt.cos02:/etc
[[email protected] ~]# source /etc/profile
[[email protected]~]# scp /etc/profile www.ljt.cos03:/etc
[[email protected] ~]# source /etc/profile
Hadoop
[[email protected] ~]# wget -O /root/hadoop-2.7.3.tar.gz http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz
[[email protected] ~]# tar -zxvf /root/hadoop-2.7.3.tar.gz -C /root
[[email protected] ~]# vi /root/hadoop-2.7.3/etc/hadoop/hadoop-env.sh

[[email protected] ~]# vi /root/hadoop-2.7.3/etc/hadoop/hdfs-site.xml

dfs.replication
2


dfs.blocksize
64m


dfs.permissions.enabled
false


dfs.nameservices
mycluster


dfs.ha.namenodes.mycluster
nn1,nn2


dfs.namenode.rpc-address.mycluster.nn1
www.ljt.cosa:8020


dfs.namenode.rpc-address.mycluster.nn2
www.ljt.cos02:8020


dfs.namenode.http-address.mycluster.nn1
www.ljt.cosa:50070


dfs.namenode.http-address.mycluster.nn2
www.ljt.cos02:50070


dfs.namenode.shared.edits.dir
qjournal://www.ljt.cosa:8485;www.ljt.cos02:8485;www.ljt.cos03:8485/mycluster


dfs.journalnode.edits.dir
/root/hadoop-2.7.3/tmp/journal


dfs.ha.automatic-failover.enabled.mycluster
true


dfs.client.failover.proxy.provider.mycluster
org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider


dfs.ha.fencing.methods
sshfence


dfs.ha.fencing.ssh.private-key-files
/root/.ssh/id_rsa

[[email protected] ~]# vi /root/hadoop-2.7.3/etc/hadoop/core-site.xml

fs.defaultFS
hdfs://mycluster


hadoop.tmp.dir
/root/hadoop-2.7.3/tmp


ha.zookeeper.quorum
www.ljt.cosa:2181,www.ljt.cos02:2181,www.ljt.cos03:2181

[[email protected] ~]# vi /root/hadoop-2.7.3/etc/hadoop/slaves
www.ljt.cosa
www.ljt.cos02
www.ljt.cos03
[[email protected] ~]# vi /root/hadoop-2.7.3/etc/hadoop/yarn-env.sh

[[email protected] ~]# vi /root/hadoop-2.7.3/etc/hadoop/mapred-site.xml


mapreduce.framework.name
yarn


mapreduce.jobhistory.address
www.ljt.cosa:10020


mapreduce.jobhistory.webapp.address
www.ljt.cosa:19888


mapreduce.jobhistory.max-age-ms
6048000000


[[email protected] ~]# vi /root/hadoop-2.7.3/etc/hadoop/yarn-site.xml

yarn.nodemanager.aux-services
mapreduce_shuffle


yarn.nodemanager.aux-services.mapreduce_shuffle.class
org.apache.hadoop.mapred.ShuffleHandler


yarn.resourcemanager.ha.enabled
true


yarn.resourcemanager.cluster-id
yarn-cluster


yarn.resourcemanager.ha.rm-ids
rm1,rm2


yarn.resourcemanager.hostname.rm1
www.ljt.cosa


yarn.resourcemanager.hostname.rm2
www.ljt.cos02


yarn.resourcemanager.webapp.address.rm1
www.ljt.cosa:8088


yarn.resourcemanager.webapp.address.rm2
www.ljt.cos02:8088


yarn.resourcemanager.zk-address
www.ljt.cosa:2181,www.ljt.cos02:2181,www.ljt.cos03:2181


yarn.resourcemanager.recovery.enabled
true


yarn.resourcemanager.store.class
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore


yarn.log-aggregation-enable
true


yarn.log.server.url
http://www.ljt.cosa:19888/jobhistory/logs

[[email protected] ~]# mkdir -p /root/hadoop-2.7.3/tmp/journal
[[email protected] ~]# mkdir -p /root/hadoop-2.7.3/tmp/journal
[[email protected] ~]# mkdir -p /root/hadoop-2.7.3/tmp/journal

将编译的本地包中的native库替换/root/hadoop-2.7.3/lib/native

[[email protected] ~]# scp -r /root/hadoop-2.7.3/ www.ljt.cos02:/root
[[email protected] ~]# scp -r /root/hadoop-2.7.3/ www.ljt.cos03:/root

查看自己的Hadoop是32位还是64位
[[email protected] native]# file libhadoop.so.1.0.0
libhadoop.so.1.0.0: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, not stripped
[[email protected] native]# pwd
/root/hadoop-2.7.3/lib/native

启动ZK
[[email protected] ~]#/root/zookeeper-3.4.9/bin/zkServer.sh start
[[email protected] ~]#/root/zookeeper-3.4.9/bin/zkServer.sh start
[[email protected] ~]#/root/zookeeper-3.4.9/bin/zkServer.sh start

格式化zkfc
[[email protected] ~]# /root/hadoop-2.7.3/bin/hdfs zkfc -formatZK
[[email protected] ~]# /root/zookeeper-3.4.9/bin/zkCli.sh

启动journalnode
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start journalnode
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start journalnode
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start journalnode
Namenode格式化和启动
[[email protected] ~]# /root/hadoop-2.7.3/bin/hdfs namenode -format
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start namenode
[[email protected] ~]# /root/hadoop-2.7.3/bin/hdfs namenode -bootstrapStandby
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start namenode
启动zkfc
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc
启动datanode
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start datanode
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start datanode
[[email protected] ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start datanode
启动yarn
[[email protected] ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start resourcemanager
[[email protected] ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start resourcemanager

[[email protected] ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start nodemanager
[[email protected] ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start nodemanager
[[email protected] ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start nodemanager

[[email protected] ~]# hdfs dfs -chmod -R 777 /
安装MySQL
[[email protected] ~]# yum remove -y mysql-libs

[[email protected] ~]# yum install mysql-server
[[email protected] ~]# service mysqld start
[[email protected] ~]# chkconfig mysqld on
[[email protected] ~]# mysqladmin -u root password ‘AAAaaa111’
[[email protected] ~]# mysqladmin -u root -h www.ljt.cosa password ‘AAAaaa111’
[[email protected] ~]# mysql -h localhost -u root -p
Enter password: AAAaaa111
mysql> GRANT ALL PRIVILEGES ON . TO ‘root’@’%’ IDENTIFIED BY ‘AAAaaa111’ WITH GRANT OPTION;
mysql> flush privileges;

[[email protected] ~]# vi /etc/my.cnf
[client]
default-character-set=utf8
[mysql]
default-character-set=utf8
[mysqld]
character-set-server=utf8
lower_case_table_names = 1

[[email protected] ~]# service mysqld restart
HIVE安装

由于官方提供的spark-2.1.1-bin-hadoop2.7.tgz包中集成的Hive是1.2.1,所以Hive版本选择1.2.1

[[email protected] ~]# wget http://archive.apache.org/dist/hive/hive-1.2.1/apache-hive-1.2.1-bin.tar.gz
[[email protected] ~]# tar -xvf apache-hive-1.2.1-bin.tar.gz

将mysql-connector-java-5.6-bin.jar 驱动放在 /root/hive-1.2.1/lib/ 目录下面

[[email protected] ~]# cp /root/apache-hive-1.2.1-bin/conf/hive-env.sh.template /root/apache-hive-1.2.1-bin/conf/hive-env.sh

[[email protected] ~]# vi /root/apache-hive-1.2.1-bin/conf/hive-env.sh

     export HADOOP_HOME=/root/hadoop-2.7.3

[[email protected] ~]# cp /root/apache-hive-1.2.1-bin/conf/hive-log4j.properties.template /root/apache-hive-1.2.1-bin/conf/hive-log4j.properties
[[email protected] ~]# vi /root/apache-hive-1.2.1-bin/conf/hive-site.xml


hive.metastore.uris
thrift://www.ljt.cosa:9083


    hive.metastore.warehouse.dir
    /user/hive/warehouse



    javax.jdo.option.ConnectionURL
    jdbc:mysql://www.ljt.cosa:3306/hive?createDatabaseIfNotExist=true&characterEncoding=UTF-8



    javax.jdo.option.ConnectionDriverName
    com.mysql.jdbc.Driver



    javax.jdo.option.ConnectionUserName
    root



    javax.jdo.option.ConnectionPassword
    AAAaaa111

[[email protected] ~]# vi /etc/init.d/hive-metastore
/root/apache-hive-1.2.1-bin/bin/hive –service metastore >/dev/null 2>&1 &
[[email protected] ~]# chmod 777 /etc/init.d/hive-metastore
[[email protected] ~]# ln -s /etc/init.d/hive-metastore /etc/rc.d/rc3.d/S65hive-metastore
[[email protected] ~]# hive
[[email protected] ~]# mysql -h localhost -u root -p
mysql> alter database hive character set latin1;
Hbase编译安装
http://archive.apache.org/dist/hbase/1.3.1/hbase-1.3.1-src.tar.gz

官方提供的是基础Hadoop2.5.1编译的,所以要进行编译:

将pom.xml文件中依赖的hadoop版本修改:

Configure PermSize. Only needed in JDK7. You can safely remove it for JDK8+

export HBASE_MASTER_OPTS=”$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m”

export HBASE_REGIONSERVER_OPTS=”$HBASE_REGIONSERVER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m”

将etc/profile,及hbase复制到其他两个节点上

[[email protected] ~]# start-hbase.sh

back-master需要手动起

[[email protected] ~]# hbase-daemon.sh start master

[[email protected] ~]# hbase shell
spark
https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz

[[email protected] ~]# cp /root/spark-2.1.1-bin-hadoop2.7/conf/spark-env.sh.template /root/spark-2.1.1-bin-hadoop2.7/conf/spark-env.sh
[[email protected] ~]# vi /root/spark-2.1.1-bin-hadoop2.7/conf/spark-env.sh
export SCALA_HOME=/root/scala-2.11.11
export JAVA_HOME=/root/jdk1.8.0_121
export HADOOP_HOME=/root/hadoop-2.7.3
export HADOOP_CONF_DIR=/root/hadoop-2.7.3/etc/hadoop
export SPARK_DAEMON_JAVA_OPTS=”-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=www.ljt.cosa:2181,www.ljt.cos02:2181,www.ljt.cos03:2181 -Dspark.deploy.zookeeper.dir=/spark”
[[email protected] ~]# cp /root/spark-2.1.1-bin-hadoop2.7/conf/slaves.template /root/spark-2.1.1-bin-hadoop2.7/conf/slaves
[[email protected] ~]# vi /root/spark-2.1.1-bin-hadoop2.7/conf/slaves
www.ljt.cosa
www.ljt.cos02
www.ljt.cos03
[[email protected] ~]# scp -r /root/spark-2.1.1-bin-hadoop2.7 www.ljt.cos02:/root
[[email protected] ~]# scp -r /root/spark-2.1.1-bin-hadoop2.7 www.ljt.cos03:/root

[[email protected] ~]# /root/spark-2.1.1-bin-hadoop2.7/sbin/start-all.sh
./start.sh
zkServer.sh start
ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/zookeeper-3.4.9/bin/zkServer.sh start’
ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/zookeeper-3.4.9/bin/zkServer.sh start’

/root/hadoop-2.7.3/sbin/start-dfs.sh
/root/hadoop-2.7.3/sbin/start-yarn.sh

如果Yarn做HA,则打开

ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/yarn-daemon.sh start resourcemanager’

/root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc
ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc’
ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc’

/root/hadoop-2.7.3/bin/hdfs haadmin -ns mycluster -failover nn2 nn1
echo ‘Y’ | ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/bin/yarn rmadmin -transitionToActive –forcemanual rm1’

/root/hbase-1.3.1/bin/start-hbase.sh

如果HBase做HA,则打开

ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/hbase-1.3.1/bin/hbase-daemon.sh start master’

/root/spark-2.1.1-bin-hadoop2.7/sbin/start-all.sh

如果Spark做HA,则打开

ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/spark-2.1.1-bin-hadoop2.7/sbin/start-master.sh’

/root/hadoop-2.7.3/sbin/mr-jobhistory-daemon.sh start historyserver

echo ‘————–www.ljt.cosa—————’
jps | grep -v Jps | sort -k 2 -t ’ ’
echo ‘————–www.ljt.cos02—————’
ssh [email protected] “export PATH=/usr/bin: PATH;jps|grepvJps|sortk2techowww.ljt.cos03sshroot@www.ljt.cos03exportPATH=/usr/bin: PATH;jps | grep -v Jps | sort -k 2 -t ’ ‘”

./stop.sh
/root/spark-2.1.1-bin-hadoop2.7/sbin/stop-all.sh

/root/hbase-1.3.1/bin/stop-hbase.sh

如果Yarn开HA,则去掉注释

ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/yarn-daemon.sh stop resourcemanager’

/root/hadoop-2.7.3/sbin/stop-yarn.sh
/root/hadoop-2.7.3/sbin/stop-dfs.sh

/root/hadoop-2.7.3/sbin/hadoop-daemon.sh stop zkfc
ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/hadoop-daemon.sh stop zkfc’

/root/zookeeper-3.4.9/bin/zkServer.sh stop
ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/zookeeper-3.4.9/bin/zkServer.sh stop’
ssh [email protected] ‘export BASH_ENV=/etc/profile;/root/zookeeper-3.4.9/bin/zkServer.sh stop’

/root/hadoop-2.7.3/sbin/mr-jobhistory-daemon.sh stop historyserver
./shutdown.sh
ssh [email protected] “export PATH=/usr/bin: PATH;shutdownhnowsshroot@www.ljt.cos03exportPATH=/usr/bin: PATH;shutdown -h now”
shutdown -h now
./reboot.sh
ssh [email protected] “export PATH=/usr/bin: PATH;rebootsshroot@www.ljt.cos03exportPATH=/usr/bin: PATH;reboot”
reboot

你可能感兴趣的:(bigdata)