随着信息化时代的进步,业务系统的数据量出现了爆发式的增长,带来的不良结果就是数据库的数据量剧增,而部分业务系统需要实时数据,有些业务系统需要离线计算后的数据,所以就产生了大数据技术,因此最近在学习大数据相关的知识点,先从最简单的,搭建开始
CDH-6.2.0-1.cdh6.2.0.p0.967373-el7 压缩包,内含JDK
链接: https://pan.baidu.com/s/13yexjyrqJtwDmRZq7fXo9g 密码: wfwa
CentOS 7.6 IOS 镜像
链接: https://pan.baidu.com/s/1Sl3xWtNK0dFiKp9s9Yt3DQ 密码: umpl
hostnamectl set-hostname cdh-master
echo \
"192.168.1.56 cdh-master
192.168.1.57 cdh-slave-1
192.168.1.58 cdh-slave-2" >> /etc/hosts
# 永久关闭
# 修改/etc/selinux/config 文件
# 将SELINUX=enforcing改为SELINUX=disabled
sed -i '/SELINUX=/c SELINUX=disabled' /etc/selinux/config
CENTOS7的防火墙是firewalld
# 禁用 firewalld
systemctl stop firewalld
# 禁用 firewalld
systemctl disable firewalld
因为CDH内有Hadoop,所以建议所有节点都做免密登录,无需包含本身节点,比如master只需要添加slave即可
#生产密钥
ssh-keygen -t rsa
cd ~/.ssh/
cp id_rsa.pub authorized_keys
# 下面的命令是把其他主机的公钥复制到本机
ssh-copy-id -i root@cdh-master
ssh-copy-id -i root@cdh-slave-1
ssh-copy-id -i root@cdh-slave-2
我这里使用的是 ntp
yum -y install ntp
修改 /etc/ntp.conf
设置同步服务器为 server ntp.aliyun.com
echo "*/10 * * * * /usr/sbin/ntpdate ntp.aliyun.com >> /var/log/ntpdate.log" >> /etc/crontab
systemctl start ntpd
systemctl enable ntpd
sysctl vm.swappiness=10
echo 'vm.swappiness=10' >> /etc/sysctl.conf
echo never > /sys/kernel/mm/transparent_hugepage/defrag
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo 'echo never > /sys/kernel/mm/transparent_hugepage/defrag' >> /etc/rc.local
echo 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' >> /etc/rc.local
我这里的MySQL只是一个演示作用,实际生产环境请使用可靠性好的MySQL服务
# 下载 MYSQL 5.7
wget http://dev.mysql.com/get/mysql57-community-release-el7-11.noarch.rpm
yum -y install mysql57-community-release-el7-11.noarch.rpm
yum -y install mysql-server --nogpgcheck # 不校验数字签名
# 启动 MySQL 5.7
systemctl start mysqld.service
systemctl status mysql.service
#进入mysql
mysql -uroot -proot
#修改root的远程访问权限,提供给客户端访问能力
mysql> GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'root' WITH GRANT OPTION;
create database cmserver default charset utf8 collate utf8_general_ci;
grant all on cmserver.* to 'cmserveruser'@'%' identified by 'cmserveruser';
create database metastore default charset utf8 collate utf8_general_ci;
grant all on metastore.* to 'hive'@'%' identified by 'hive';
create database amon default charset utf8 collate utf8_general_ci;
grant all on amon.* to 'amonuser'@'%' identified by 'amonuser';
create database rman default charset utf8 collate utf8_general_ci;
grant all on rman.* to 'rmanuser'@'%' identified by 'rmanuser';
CREATE DATABASE hue DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON hue.* TO 'hue'@'%' IDENTIFIED BY 'hue';
CREATE DATABASE oozie DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON oozie.* TO 'oozie'@'%' IDENTIFIED BY 'oozie';
# flush
FLUSH PRIVILEGES;
最好使用Oracle JDK,使用者请仔细斟酌
JDK 文件位于 压缩包中
rpm -ihv oracle-j2sdk1.8-1.8.0+update181-1.x86_64.rpm
# 配置环境变量
export JAVA_HOME=/usr/java/jdk1.8.0_181-cloudera
export PATH=$PATH:$JAVA_HOME/bin
# 刷新环境变量
source /etc/profile
######################## MySQL 驱动处理开始 ########################
yum install unzip -y
# 下载驱动程序
wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.47.zip
# 解压 mysql-jdbc 驱动包
unzip mysql-connector-java-5.1.47.zip
# 创建mysql 驱动目录,cdh 默认在这个目录中寻找mysql 链接驱动程序
mkdir -p /usr/share/java
# 拷贝驱动程序到指定目录
mv mysql-connector-java-5.1.47/mysql-connector-java-5.1.47.jar /usr/share/java/mysql-connector-java.jar
######################## MySQL 驱动处理结束 ########################
########################## 安装 CM 开始 ###########################
# 安装httpd
yum --disableexcludes=all install -y httpd
# 创建 cloudera 目录
mkdir -p /opt/cloudera/parcel-repo/
#
mv CDH-6.2.0-1.cdh6.2.0.p0.967373-el7.parcel* /opt/cloudera/parcel-repo/
mv manifest.json /opt/cloudera/parcel-repo/
# 安装CM相关资源文件
yum localinstall -y cloudera-manager-daemons-6.2.0-968826.el7.x86_64.rpm
yum localinstall -y cloudera-manager-agent-6.2.0-968826.el7.x86_64.rpm
yum localinstall -y cloudera-manager-server-6.2.0-968826.el7.x86_64.rpm
# 修改cm相关数据库配置
vim /etc/cloudera-scm-server/db.properties
# The database type
# Currently 'mysql', 'postgresql' and 'oracle' are valid databases.
com.cloudera.cmf.db.type=mysql
# The database host
# If a non standard port is needed, use 'hostname:port'
com.cloudera.cmf.db.host=cdh-master
# The database name 数据库和上方的 SQL语句要对应上
com.cloudera.cmf.db.name=cmserver
# The database user 数据库用户名称和上方的 SQL语句要对应上
com.cloudera.cmf.db.user=cmserveruser
# The database user's password 数据库用户密码和上方的 SQL语句要对应上
com.cloudera.cmf.db.password=cmserveruser
com.cloudera.cmf.db.setupType=EXTERNAL
########################## 安装 CM 结束 ###########################
########################## 配置 探针 开始 ###########################
# 设置探针
vim /etc/cloudera-scm-agent/config.ini
# Hostname of the CM server.
server_host=cdh-master
# Port that the CM server is listening on.
server_port=7182
########################## 配置 探针 结束 ###########################
# 启动主节点
systemctl start cloudera-scm-server
systemctl status cloudera-scm-server
# 安装守护程序
yum localinstall -y cloudera-manager-daemons-6.2.0-968826.el7.x86_64.rpm
# 安装探针程序
yum localinstall -y cloudera-manager-agent-6.2.0-968826.el7.x86_64.rpm
# 配置探针
vim /etc/cloudera-scm-agent/config.ini
# Hostname of the CM server.
server_host=cdh-master
# Port that the CM server is listening on.
server_port=7182
# 启动slave节点
systemctl start cloudera-scm-agent
systemctl status cloudera-scm-agent