虚拟机准备3台,分别是hadoop01 hadoop02 hadoop03,所装系统为centos7
1.修改主机名
vim /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=hadoop01
NETWORKING_IPV6=no
PEERNTP=no
vim /etc/hostname
hadoop01
2.主机名映射
vim /etc/hosts(3台机子都改)
192.168.133.xxx hadoop01
192.168.133.xxx hadoop02
192.168.133.xxx hadoop03
修改C:\Windows\System32\drivers\etc\hosts 文件(便于后期本机用主机名访问集群服务)
192.168.133.xxx hadoop01
192.168.133.xxx hadoop02
192.168.133.xxx hadoop03
3.设置静态ip(3台机子都改)
vim /etc/sysconfig/network-scripts/ifcfg-ens33
TYPE="Ethernet"
PROXY_METHOD="none"
BROWSER_ONLY="no"
BOOTPROTO="static"
DEFROUTE="yes"
IPV4_FAILURE_FATAL="no"
IPV6INIT="yes"
IPV6_AUTOCONF="yes"
IPV6_DEFROUTE="yes"
IPV6_FAILURE_FATAL="no"
IPV6_ADDR_GEN_MODE="stable-privacy"
NAME="ens33"
UUID="bfaae4ba-2275-4a2c-85db-c94585096a42"
DEVICE="ens33"
ONBOOT="yes"
IPADDR="192.168.133.xxx"
NETMASK="255.255.255.0"
GATEWAY="192.168.133.x"
DNS1="192.168.133.x"
service network restart
4.关闭防火墙(设置开机禁用)
查看状态:systemctl status firewalld
开启: systemctl start firewalld.service
重启:systemctl restart firewalld.service
关闭:systemctl stop firewalld.service
开机禁用:systemctl disable firewalld.service
5.关闭selinux
vim /etc/sysconfig/selinux
修改内容SELINUX=disabled
6.ssh无密码访问
ssh-keygen -t rsa(主节点上输入回车到结束)
ssh-copy-id hadoop01(根据提示输入密码)
ssh-copy-id hadoop02(根据提示输入密码)
ssh-copy-id hadoop03(根据提示输入密码)
7.Linux系统最大打开文件数量设置
查看命令
ulimit -a ## 查看所有
ulimit -n ##查看同时打开的文件数量
ulimit -u ##查看同时的进程数量
修改命令
vim /etc/security/limits.conf(添加下面的内容)
* soft nofile 32768
* hard nofile 1048576
* soft nproc 65536
* hard nproc 65536
* soft memlock unlimited
* hard memlock unlimited
vim /etc/security/limits.d/90-nproc.conf(添加下面的内容)
* soft nproc 65536
8.时钟同步
选择一台机器作为时间服务器: hadoop01
hadoop01进行操作:
修改ntpd服务的配置参数:
vim /etc/ntp.conf (添加下面的内容)
server 127.127.0.1
fudge 127.127.0.1 stratum 8
启动ntpd服务:
service ntpd restart
systemctl enable ntpd.service ## 开机启动服务
创建同步脚本:
vim /opt/date_sync.sh
service ntpd stop
/usr/sbin/ntpdate -u hadoop01
service ntpd start
修改权限:
chmod u+x /opt/date_sync.sh
运行shell脚本:
cd /opt
./date_sync.sh
同步到其他机器:
scp date_sync.sh hadoop02:/opt
scp date_sync.sh hadoop03:/opt
启动定时任务(所有机器)
crontab -e
0-59/5 * * * * /opt/date_sync.sh
9.重启机器
10.集群搭建准备工作(所有机器)
在 /opt 下创建两个目录 softwares 和module
softwares 中放所有的包
module中放解压后的文件
11.安装jdk(所有机器)
需要卸载系统中已有的jdk,然后重新安装对应版本的jdk
查看已有的jdk
rpm -qa | grep java ## 查看到包含java的服务
java-1.7.0-openjdk-1.7.0.45-2.4.3.3.el6.x86_64
java-1.6.0-openjdk-1.6.0.0-1.66.1.13.0.el6.x86_64
tzdata-java-2013g-1.el6.noarch
卸载jdk
rpm -e --nodeps python-javapackages-3.4.1-11.el7.noarch java-1.8.0-openjdk-1.8.0.161-2.b14.el7.x86_64
javassist-3.16.1-10.el7.noarch javamail-1.4.6-8.el7.noarch java-1.8.0-openjdk-headless-1.8.0.161-2.b14.el7.x86_64
tzdata-java-2018c-1.el7.noarch javapackages-tools-3.4.1-11.el7.noarch
安装jdk
cd /opt/softwares/
rpm -ivh jdk-8u11-linux-x64.rpm
配置JAVA_HOME环境变量
vim /etc/profile
export PATH=$PATH:/usr/java/jdk1.8.0_11/bin(不配置jsp命令不可用)
source /etc/profile
12.ZooKeeper-3.4.10集群安装
选择MySQL作为数据存在的容器,默认使用postgresql
解压:
tar -zxvf /opt/softwares/zookeeper-3.4.10.tar.gz -C /opt/modules/
cd /opt/modules/zookeeper-3.4.10/conf
cp zoo_sample.cfg zoo.cfg
修改zoo.cfg
vim zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/opt/modules/zookeeper-3.4.10/data
# the port at which the clients will connect
clientPort=2181
server.1=hadoop01:2888:3888
server.2=hadoop02:2888:3888
server.3=hadoop03:2888:3888
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
设置myid
在/opt/module/zookeeper-3.4.10/data 目录下创建一个 myid 的文件
touch myid
添加 myid 文件,注意一定要在 linux 里面创建,在 notepad++里面很可能乱码
在文件中添加与 server 对应的编号:如 1
各节点分发:将配置好的文件拷贝到其他机器
scp -r /opt/modules/zookeeper-3.4.10/ root@hadoop02:/opt/modules/
scp -r /opt/modules/zookeeper-3.4.10/ root@hadoop03:/opt/modules/
并分别修改 myid 文件中内容为 2,3
配置环境变量
export ZOOKEEPER_HOME=/opt/modules/zookeeper-3.4.10
export PATH=$PATH:$ZOOKEEPER_HOME/bin
修改日志输出路径为指定目录:
修改zkEnv.sh中的
if [ "x${ZOO_LOG_DIR}" = "x" ]
then
ZOO_LOG_DIR="/opt/modules/zookeeper-3.4.10/log"
fi
if [ "x${ZOO_LOG4J_PROP}" = "x" ]
then
ZOO_LOG4J_PROP="INFO,ROLLINGFILE"
fi
修改log4j.properties中的
zookeeper.root.logger=INFO,ROLLINGFILE
常用操作命令
(1)启动 zookeeper
zkServer.sh start
(2)查看状态
zkServer.sh status
(3)停止zookeeper
zkServer.sh stop
启动若报错:java.net.NoRouteToHostException: No route to host
一般是防火墙没关闭
13.搭建hadoop集群的HA
解压:
tar -zxvf /opt/softwares/hadoop-2.7.7.tar.gz -C /opt/modules/
修改hadoo-env.sh
export JAVA_HOME=/root/training/jdk1.8.0_144
修改core-site.xml
fs.defaultFS
hdfs://ns1
hadoop.tmp.dir
/opt/modules/hadoop-2.7.7/data/tmp
ha.zookeeper.quorum
hadoop01:2181,hadoop02:2181,hadoop03:2181
io.file.buffer.size
131072
hadoop.proxyuser.root.hosts
*
hadoop.proxyuser.root.groups
*
修改hdfs-site.xml
dfs.nameservices
ns1
dfs.ha.namenodes.ns1
nn1,nn2
dfs.namenode.rpc-address.ns1.nn1
hadoop01:8020
dfs.namenode.http-address.ns1.nn1
hadoop01:50070
dfs.namenode.rpc-address.ns1.nn2
hadoop02:8020
dfs.namenode.http-address.ns1.nn2
hadoop02:50070
dfs.namenode.shared.edits.dir
qjournal://hadoop01:8485;hadoop02:8485;hadoop03:8485/ns1
dfs.journalnode.edits.dir
/opt/modules/hadoop-2.7.7/journal
dfs.ha.automatic-failover.enabled
true
dfs.client.failover.proxy.provider.ns1
org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
dfs.ha.fencing.methods
sshfence
shell(/bin/true)
dfs.ha.fencing.ssh.private-key-files
/root/.ssh/id_rsa
dfs.ha.fencing.ssh.connect-timeout
30000
dfs.namenode.name.dir
file:/opt/modules/hadoop-2.7.7/data/tmp/dfs/name
dfs.datanode.data.dir
file:/opt/modules/hadoop-2.7.7/data/tmp/dfs/data
dfs.replication
3
dfs.webhdfs.enabled
true
dfs.journalnode.http-address
0.0.0.0:8480
dfs.journalnode.rpc-address
0.0.0.0:8485
ha.zookeeper.quorum
hadoop01:2181,hadoop02:2181,hadoop03:2181
修改mapred-site.xml
mapreduce.framework.name
yarn
mapreduce.jobhistory.address
0.0.0.0:10020
mapreduce.jobhistory.webapp.address
0.0.0.0:19888
修改yarn-site.xml
yarn.resourcemanager.connect.retry-interval.ms
2000
yarn.resourcemanager.ha.enabled
true
yarn.resourcemanager.cluster-id
yrc
yarn.resourcemanager.ha.rm-ids
rm1,rm2
ha.zookeeper.quorum
hadoop01:2181,hadoop02:2181,hadoop03:2181
yarn.resourcemanager.ha.automatic-failover.enabled
true
yarn.resourcemanager.hostname.rm1
hadoop01
yarn.resourcemanager.hostname.rm2
hadoop02
yarn.resourcemanager.recovery.enabled
true
yarn.resourcemanager.zk-address
hadoop01:2181,hadoop02:2181,hadoop03:2181
yarn.resourcemanager.zk-state-store.address
hadoop01:2181,hadoop02:2181,hadoop03:2181
yarn.resourcemanager.store.class
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
yarn.resourcemanager.cluster-id
ns1-yarn
yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms
5000
yarn.resourcemanager.address.rm1
hadoop01:8132
yarn.resourcemanager.scheduler.address.rm1
hadoop01:8130
yarn.resourcemanager.webapp.address.rm1
hadoop01:23188
yarn.resourcemanager.resource-tracker.address.rm1
hadoop01:8131
yarn.resourcemanager.admin.address.rm1
hadoop01:8033
yarn.resourcemanager.ha.admin.address.rm1
hadoop01:23142
yarn.resourcemanager.address.rm2
hadoop02:8132
yarn.resourcemanager.scheduler.address.rm2
hadoop02:8130
yarn.resourcemanager.webapp.address.rm2
hadoop02:23188
yarn.resourcemanager.resource-tracker.address.rm2
hadoop02:8131
yarn.resourcemanager.admin.address.rm2
hadoop02:8033
yarn.resourcemanager.ha.admin.address.rm2
hadoop02:23142
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.aux-services.mapreduce.shuffle.class
org.apache.hadoop.mapred.ShuffleHandler
yarn.nodemanager.local-dirs
/opt/modules/hadoop-2.7.7/yarn
yarn.nodemanager.log-dirs
/opt/modules/hadoop-2.7.7/logs
mapreduce.shuffle.port
23080
yarn.client.failover-proxy-provider
org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider
yarn.resourcemanager.ha.automatic-failover.zk-base-path
/yarn-leader-election
Optional setting. The default value is /yarn-leader-election
xml配置文件中不能有汉字
修改slaves
hadoop01
hadoop02
hadoop03
将配置好的hadoop拷贝到其他节点
scp -r /opt/modules/hadoop-2.7.7/ root@hadoop02:/opt/modules/hadoop-2.7.7
scp -r /opt/modules/hadoop-2.7.7/ root@hadoop03:/opt/modules/hadoop-2.7.7
配置hadoop环境变量:
export HADOOP_HOME=/opt/modules/hadoop-2.7.7
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
启动测试集群
启动Zookeeper集群
分别在hadoop001,hadoop002,hadoop003上执行
zkServer.sh start 启动zookeeper
然后查看状态 zkServer.sh status
(一个leader,两个follower)zookeeper正常启动
格式化HDFS的Zookeeper存储目录
在 hadoop01上执行( 只需在一个 zookeeper 节点执行即可 ):hdfs zkfc –formatZK
启动 JournalNode 集群
所有 journalnode 节点上分别执行:hadoop-daemon.sh start journalnode
格式化并启动第一个 NameNode
选择 hadoop01
格式化当前节点的 namenode 数据:hdfs namenode -format
格式化 journalnode 的数据,这个是 ha 需要做的:hdfs namenode -initializeSharedEdits
启动当前节点的 namenode 服务:hadoop-daemon.sh start namenode
格式化并启动第二个 NameNode
在 hadoop02执行:
hadoop001已经格式化过,然后hadoop01上data目录下的内容同步至 hadoop002
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
启动所有DataNode
每个 datanode 上执行:hadoop-daemon.sh start datanode
启动 ZooKeeperFailoverController
所有 namenode 节点分别执行:hadoop-daemon.sh start zkfc
登陆 namenode 服务器 web 端查看服务器状态
此时登陆 http://hadoop001:50070与 http://haoop002:50070(windows中没配置ip则用ip访问)
其中一个为 active 另一个为 standby 状态。
启动YARN
在hadoop01上执行:start-yarn.sh
启动resourcemanager
hadoop02 上启动 resourcemanager:yarn-daemon.sh start resourcemanager
登陆 resourcemanager 服务器 web 端查看服务器状态
此时登陆 http://hadoop001:23188与 http://haoop002:23188
其中一个为 active 另一个为 standby 状态。活跃节点可以正常访问,备用节点会自动跳转至活跃节点的 web 地址。
http://resourcemanager_ipaddress:23188
启动Hadoop集群也可用(start-all.sh)
停止Hadoop集群也可用(stop-all.sh)
14.安装mysql(hadoop01上)
选择MySQL作为数据存在的容器,默认使用postgresql
mysql的安装采用源码编译的方式
http://dev.mysql.com/doc/refman/5.6/en/linux-installation.html
mysql安装:
选择mysql安装的机器: hadoop01
实际环境中是一个高配的机器,而且数据磁盘做过冗余
上传mysql安装需要的文件
采用源码安装(15-20分钟左右)
解压:
cd /opt/modules
tar -zxvf /opt/softwares/mysql-5.6.26.tar.gz
安装必要的服务:
yum -y install gcc gcc-c++ gdb cmake ncurses-devel bison bison-devel
进行编译:
cd /opt/modules/mysql-5.6.26/
命令如下;
cmake \
-DCMAKE_INSTALL_PREFIX=/usr/local/mysql \
-DMYSQL_DATADIR=/usr/local/mysql/data \
-DSYSCONFDIR=/etc \
-DWITH_INNOBASE_STORAGE_ENGINE=1 \
-DWITH_PARTITION_STORAGE_ENGINE=1 \
-DMYSQL_UNIX_ADDR=/tmp/mysql.sock \
-DMYSQL_TCP_PORT=3306 \
-DDEFAULT_CHARSET=utf8 \
-DDEFAULT_COLLATION=utf8_general_ci
参数含义:
CMAKE_INSTALL_PREFIX: mysql服务的安装路径,也就是最终mysql位于的地方
MYSQL_DATADIR: mysql数据存储目录,同时一些日志文件也会存储在这儿
MYSQL_TCP_PORT: 端口号
DEFAULT_CHARSET/DEFAULT_COLLATION: 字符集
编译
make 需要20分钟
make install
mysql配置:
主要配置开机启动mysql服务,需要配置一些常用的配置项
添加mysql用户组和用户
groupadd mysql
useradd -r -g mysql mysql
id mysql ## 查看
mysql初始化
cd /usr/local/mysql/scripts/
./mysql_install_db --basedir=/usr/local/mysql --datadir=/usr/local/mysql/data --user=mysql
期望使用service命令管理mysql
cp /opt/modules/mysql-5.6.26/support-files/mysql.server /etc/init.d/mysql
开机启动mysql
chkconfig mysql on
把文件内容改成和mysql根目录下的my.cnf文件内容一致
启动服务
service mysql start(可能会报/etc/init.d/mysql没权限,赋执行权限即可)
配置环境变量:
为了方便操作,把mysql的命令添加到PATH中去
vim /etc/profile
export MYSQL_HOME=/usr/local/mysql
export PATH=$PATH:$MYSQL_HOME/bin
source /etc/profile
设置密码:
mysql
mysql> set password=password("123456"); ## 设置密码
Query OK, 0 rows affected (0.01 sec)
mysql> flush privileges; ## 刷新
Query OK, 0 rows affected (0.00 sec)
mysql> exit
Bye
mysql -uroot -p123456(登录测试)
Windows Navicat连接mysql
mysql -u root -proot
mysql>GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'root的连接密码' WITH GRANT OPTION;
如果报错,无法连接,则执行下面语句查看是否合适
查看网络端口信息:
netstat -ntpl
查看防火墙的状态,发现3306端口的数据包是否是丢弃状态
iptables -vnL
清除防火墙中链中的规则
iptables -F
15.搭建hive集群
解压:
tar -zxvf /opt/softwares/apache-hive-2.3.0-bin.tar.gz -C /opt/modules/
修改文件夹名称:
cd /opt/modules/
mv apache-hive-2.3.0-bin hive-2.3.0
配置环境变量:
/etc/profile 中配置
export HIVE_HOME=/opt/modules/hive-2.3.0
export PATH=$PATH:$HIVE_HOME/bin
修改hive-env.xml文件:
将hive-env.sh.template文件复制为hive-env.sh, 编辑hive-env.xml文件(添加下面内容)
JAVA_HOME=/usr/java/jdk1.8.0_11
HADOOP_HOME=/opt/modules/hadoop-2.7.7
HIVE_HOME=/opt/modules/hive-2.3.0
export HIVE_CONF_DIR=$HIVE_HOME/conf
export CLASSPATH=$CLASSPATH:$JAVA_HOME/lib:$HADOOP_HOME/lib:$HIVE_HOME/lib
export HADOOP_OPTS="-Dorg.xerial.snappy.tempdir=/tmp
-Dorg.xerial.snappy.lib.name=libsnappyjava.jnilib $HADOOP_OPTS"
编辑hive-site.xml文件:
将hive-default.xml.template文件拷贝为hive-site.xml, 并编辑hive-site.xml文件(删除所有内容,添加下面内容)
javax.jdo.option.ConnectionURL
jdbc:mysql://hadoop01:3306/metastore?createDatabaseIfNotExist=true
JDBC connect string for a JDBC metastore
javax.jdo.option.ConnectionDriverName
com.mysql.jdbc.Driver
Driver class name for a JDBC metastore
javax.jdo.option.ConnectionUserName
root
username to use against metastore database
javax.jdo.option.ConnectionPassword
123456
password to use against metastore database
datanucleus.autoCreateSchema
true
datanucleus.autoCreateTables
true
datanucleus.autoCreateColumns
true
hive.metastore.warehouse.dir
/user/hive/warehouse
location of default database for the warehouse
hive.downloaded.resources.dir
/opt/modules/hive-2.3.0/tmp/resources
Temporary local directory for added resources in the remote file system.
hive.exec.dynamic.partition
true
hive.exec.dynamic.partition.mode
nonstrict
hive.exec.local.scratchdir
/opt/modules/hive-2.3.0/tmp/HiveJobsLog
Local scratch space for Hive jobs
hive.downloaded.resources.dir
/opt/modules/hive-2.3.0/tmp/ResourcesLog
Temporary local directory for added resources in the remote file system.
hive.querylog.location
/opt/modules/hive-2.3.0/tmp/HiveRunLog
Location of Hive run time structured log file
hive.server2.logging.operation.log.location
/opt/modules/hive-2.3.0/tmp/OpertitionLog
Top level directory where operation tmp are stored if logging functionality is enabled
hive.server2.thrift.bind.host
hadoop01
hive.server2.thrift.port
10000
hive.server2.thrift.http.port
10001
hive.server2.thrift.http.path
cliservice
hive.server2.webui.host
hadoop01
hive.server2.webui.port
10002
hive.scratch.dir.permission
755
hive.server2.enable.doAs
false
hive.auto.convert.join
false
spark.dynamicAllocation.enabled
true
spark.driver.extraJavaOptions
-XX:PermSize=128M -XX:MaxPermSize=512M
hive.cli.print.header
true
Whether to print the names of the columns in query output.
hive.cli.print.current.db
true
Whether to include the current database in the Hive prompt.
hive.metastore.schema.verification
false
Enforce metastore schema version consistency.
True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures proper metastore schema migration. (Default)
False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
拷贝JDBC包
将JDBC的jar包放入$HIVE_HOME/lib目录下:
cp /opt/softwares/mysql-connector-java-5.1.26-bin.jar /opt/modules/hive-2.3.0/lib/
拷贝jline扩展包
将$HIVE_HOME/lib目录下的jline-2.12.jar包拷贝到$HADOOP_HOME/share/hadoop/yarn/lib目录下,并删除$HADOOP_HOME/share/hadoop/yarn/lib目录下旧版本的jline包
拷贝tools.jar包
复制$JAVA_HOME/lib目录下的tools.jar到$HIVE_HOME/lib下
执行初始化Hive操作
选用MySQLysql和Derby二者之一为元数据库
注意:先查看MySQL中是否有残留的Hive元数据,若有,需先删除
schematool -dbType mysql -initSchema ## MySQL作为元数据库
其中mysql表示用mysql做为存储hive元数据的数据库,
若不用mysql做为元数据库, 则执行
schematool -dbType derby -initSchema ## Derby作为元数据库
脚本hive-schema-1.2.1.mysql.sql会在配置的Hive元数据库中初始化创建表
启动Metastore服务:
执行Hive前, 须先启动metastore服务, 否则会报错
./hive --service metastore
然后打开另一个终端窗口,之后再启动Hive进程
搭建hive遇见的问题:
message:Version information not found in metastore
修改conf/hive-site.xml 中的 “hive.metastore.schema.verification”值为 false 即可解决
Access denied for user 'root'@'hadoop01' (using password: YES)
grant all privileges on *.* to root@hadoop01 identified by '123456';
flush privileges;
16.搭建hbase集群
解压:
tar -zxvf /opt/softwares/hbase-2.0.0-bin.tar.gz -C /opt/modules/
配置环境变量:
/etc/profile 中配置
export HBASE_HOME=/opt/modules/hbase-2.0.0
export PATH=$PATH:$HBASE_HOME/bin
配置hbase-env.sh
开启JAVA_HOME配置
export JAVA_HOME=/usr/java/jdk1.8.0_11
关闭HBase自带的zookeeper,使用zookeeper集群
export HBASE_MANAGES_ZK=false
配置hbase-site.xml
hbase.rootdir
hdfs://hadoop01:8020/hbase
hbase.cluster.distributed
true
hbase.zookeeper.quorum
hadoop01,hadoop02,hadoop03
hbase.temp.dir
/opt/modules/hbase-2.0.0/tmp
hbase.zookeeper.property.dataDir
/opt/modules/hbase-2.0.0/tmp/zookeeper
hbase.master.info.port
60010
配置regionservers
hadoop01
hadoop02
hadoop03
配置backup-masters
(conf目录下)
vim backup-masters
hadoop02
复制Hadoop配置文件hdfs-site.xml到HBase的conf目录
cp $HADOOP_HOME/etc/hadoop/hdfs-site.xml $HBASE_HOME/conf/
复制文件到所有的regionservers服务器中
scp -r /opt/modules/hbase-2.0.0/ root@hadoop02:/opt/modules/
scp -r /opt/modules/hbase-2.0.0/ root@hadoop03:/opt/modules/
启动命令:
启动HBase start-hbase.sh
停止HBase stop-hbase.sh
查看web页面:http://192.168.133.160:60010/master-status#userTables
17.搭建spark集群
解压 :
tar -zxvf /opt/softwares/spark-2.4.0-bin-hadoop2.7.tgz -C /opt/modules
修改spark-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_11
HADOOP_CONF_DIR=/opt/modules/hadoop-2.7.7/etc/hadoop
SPARK_LOCAL_IP=hadoop01(节点主机名)
export SPARK_LIBRARY_PATH=${SPARK_HOME}/lib
export SCALA_LIBRARY_PATH=${SPARK_HOME}/lib
export SPARK_MASTER_HOST=192.168.133.160
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=8080
export SPARK_WORKER_CORES=3
export SPARK_WORKER_MEMORY=3G
export SPARK_WORKER_PORT=7078
export SPARK_WORKER_WEBUI_PORT=8081
export SPARK_WORKER_INSTANCES=1
export SPARK_WORKER_MEMORY=1G
export SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://hadoop01/spark/job/history"
修改spark-defaults.conf
spark.eventLog.enabled true
spark.eventLog.dir hdfs://hadoop01/spark/job/history
修改slaves
hadoop01
hadoop02
hadoop03
节点分发
scp -r /opt/modules/spark-2.4.0-bin-hadoop2.7/ root@hadoop02:/opt/modules/
scp -r /opt/modules/spark-2.4.0-bin-hadoop2.7/ root@hadoop03:/opt/modules/
配置SPARK_HOME
export SPARK_HOME=/opt/modules/spark-2.4.0-bin-hadoop2.7
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
hdfs创建目录:
/spark/job/history
查看/spark/job/history文件:
hdfs dfs -chmod -R 755 /spark(授权)
启动master:start-master.sh
启动slave: start-slave.sh spark://192.168.133.xxx:7077
启动spark-shell:spark-shell
测试:run-example SparkPi 查看是否输出:Pi is roughly 3.14374
查看web页面;http://192.168.133.160:8080/