安装虚拟机
安装文件:CentOS-6.6-x86_64-bin-DVD1.iso
自定义硬件
内存增加到8G,处理器修改为4核
自动安装
永久修改hostname
vim /etc/sysconfig/network
HOSTNAME=bigdata.cqmfin.com
GATEWAY=192.168.100.2
配置Host
vim /etc/hosts
192.168.100.131 bigdata.cqmfin.com
永久关闭防火墙
chkconfig iptables off
关闭selinux
vim /etc/sysconfig/selinux
下载安装文件
需要安装:hadoop V2.8.3、hive V2.3.3、hbase 0.98.24、spark V2.1.3(scala-2.11.12)
http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.8.3/hadoop-2.8.3-src.tar.gz
http://archive.apache.org/dist/hadoop/common/hadoop-2.8.3/hadoop-2.8.3-src.tar.gz
http://archive.apache.org/dist/hbase/0.98.24/
[root@bigdata opt]# ll
total 969008
-rw-r--r--. 1 root root 232229830 Aug 7 02:55 apache-hive-2.3.3-bin.tar.gz
-rw-r--r--. 1 root root 244469481 Aug 7 02:55 hadoop-2.8.3.tar.gz
-rw-r--r--. 1 root root 66281053 Aug 7 02:54 hbase-0.98.24-hadoop2-bin.tar.gz
-rw-r--r--. 1 root root 181367942 Aug 7 02:54 jdk-8u91-linux-x64.gz
-rw-r--r--. 1 root root 23157347 Aug 7 03:03 MySQL-client-5.6.28-1.linux_glibc2.5.x86_64.rpm
-rw-r--r--. 1 root root 983911 Aug 7 02:55 mysql-connector-java-5.1.38.jar
-rw-r--r--. 1 root root 4588492 Aug 7 03:03 MySQL-devel-5.6.28-1.linux_glibc2.5.x86_64.rpm
-rw-r--r--. 1 root root 88909659 Aug 7 03:03 MySQL-server-5.6.28-1.linux_glibc2.5.x86_64.rpm
-rw-r--r--. 1 root root 29114457 Aug 7 02:55 scala-2.11.12.tgz
-rw-r--r--. 1 root root 121141828 Aug 7 02:55 spark-2.1.3-bin-without-hadoop.tgz
安装mysql
[root@KFGL-test app]# rpm -qa|grep mysql
mysql-libs-5.1.71-1.el6.x86_64
[root@KFGL-test app]# yum remove mysql-libs
Loaded plugins: fastestmirror, security
Setting up Remove Process
Resolving Dependencies
rpm安装
[root@KFGL-test app]# rpm -ivh MySQL-server-5.6.28-1.linux_glibc2.5.x86_64.rpm
warning: MySQL-server-5.6.28-1.linux_glibc2.5.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY
Preparing... ########################################### [100%]
1:MySQL-server ########################################### [100%]
warning: user mysql does not exist - using root
warning: group mysql does not exist - using root
[root@KFGL-test app]# rpm -ivh MySQL-client-5.6.28-1.linux_glibc2.5.x86_64.rpm
warning: MySQL-client-5.6.28-1.linux_glibc2.5.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY
Preparing... ########################################### [100%]
1:MySQL-client ########################################### [100%]
[root@KFGL-test app]# rpm -ivh MySQL-devel-5.6.28-1.linux_glibc2.5.x86_64.rpm
warning: MySQL-devel-5.6.28-1.linux_glibc2.5.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY
Preparing... ########################################### [100%]
1:MySQL-devel ########################################### [100%]
设置mysql环境
启动
[root@KFGL-test app]# mysqld_safe --skip-grant-tables &
[1] 29298
[root@KFGL-test app]# 160720 10:09:59 mysqld_safe Logging to '/var/lib/mysql/KFGL-test.err'.
160720 10:10:00 mysqld_safe Starting mysqld daemon with databases from /var/lib/mysql
设置密码
mysql> use mysql
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
mysql> update user set password=password('123456') where user = 'root';
Query OK, 4 rows affected (0.00 sec)
Rows matched: 4 Changed: 4 Warnings: 0
mysql重启后再操作
mysql> SET PASSWORD = PASSWORD('123456');
Query OK, 0 rows affected (0.00 sec)
设置权限
mysql> GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123456' WITH GRANT OPTION;
Query OK, 0 rows affected (0.00 sec)
配置文件
[root@KFGL-test app]# vi /usr/my.cnf
max_allowed_packet=500M
wait_timeout=288000
interactive_timeout = 288000
lower_case_table_names=1
mysql启动命令
[root@KFGL-test app]# /etc/init.d/mysql restart|start|shutdown
解压
tar -zxvf jdk-8u91-linux-x64.gz
tar -zxvf apache-hive-2.3.3-bin.tar.gz
tar -zxvf hbase-0.98.24-hadoop2-bin.tar.gz
tar -zxvf scala-2.11.12.tgz
tar -zxvf spark-2.1.3-bin-without-hadoop.tgz
tar -zxvf hadoop-2.8.3.tar.gz
配置环境
vim /etc/profile
export JAVA_HOME=/opt/jdk1.8.0_91
export HADOOP_HOME=/opt/hadoop-2.8.3
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HIVE_HOME=/opt/apache-hive-2.3.3-bin/
export SCALA_HOME=/opt/scala-2.11.12
export SPARK_HOME=/opt/spark-2.1.3-bin-without-hadoop
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:${HIVE_HOME}/bin:$SPARK_HOME/bin:$SCALA_HOME/bin:$PATH
立即生效
source /etc/profile
配置core-site.xml
vim /opt/hadoop-2.8.3/etc/hadoop/core-site.xml
配置hdfs-site.xml
vim /opt/hadoop-2.8.3/etc/hadoop/hdfs-site.xml
配置yarn-site.xml
配置mapred-site.xml
设置SSH无密码登录
Hadoop集群中的各个机器间会相互地通过SSH访问,每次访问都输入密码是不现实的,所以要配置各个机器间的
SSH是无密码登录的。
1、 在BigData上生成公钥
ssh-keygen -t rsa
一路回车,都设置为默认值,然后再当前用户的Home目录下的.ssh目录中会生成公钥文件(id_rsa.pub)和私钥文件(id_rsa)。
2、 分发公钥
ssh-copy-id bigdata.cqmfin.com
内存优化
[hadoop@bigdata01 hadoop]$ vi /opt/hadoop-2.8.3/etc/hadoop/hadoop-env.sh
export HADOOP_PORTMAP_OPTS="-Xms512m -Xmx1024m -XX:PermSize=512M -XX:MaxPermSize=1024m $HADOOP_PORTMAP_OPTS"
export HADOOP_CLIENT_OPTS="-Xms512m -Xmx1024m -XX:PermSize=512M -XX:MaxPermSize=1024m $HADOOP_CLIENT_OPTS"
格式化namenode
[root@bigdata ~] hadoop namenode -format
启动Hadoop
[root@bigdata ~]# start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
18/08/07 04:35:33 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Starting namenodes on [bigdata.cqmfin.com]
bigdata.cqmfin.com: starting namenode, logging to /opt/hadoop-2.8.3/logs/hadoop-root-namenode-bigdata.cqmfin.com.out
localhost: starting datanode, logging to /opt/hadoop-2.8.3/logs/hadoop-root-datanode-bigdata.cqmfin.com.out
Starting secondary namenodes [bigdata.cqmfin.com]
bigdata.cqmfin.com: starting secondarynamenode, logging to /opt/hadoop-2.8.3/logs/hadoop-root-secondarynamenode-bigdata.cqmfin.com.out
18/08/07 04:35:48 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
starting yarn daemons
starting resourcemanager, logging to /opt/hadoop-2.8.3/logs/yarn-root-resourcemanager-bigdata.cqmfin.com.out
localhost: starting nodemanager, logging to /opt/hadoop-2.8.3/logs/yarn-root-nodemanager-bigdata.cqmfin.com.out
验证Hadoop安装成功
hadoop fs -mkdir /user
hadoop fs -mkdir /user/input
hadoop fs -put /opt/data/wc.input /user/wc.input
/opt/hadoop-2.8.3/bin/hadoop jar /opt/hadoop-2.8.3/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.3.jar wordcount /user/wc.input /user/output1
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option PermSize=512M; support was removed in 8.0
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=1024m; support was removed in 8.0
18/08/07 06:06:46 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
18/08/07 06:06:47 INFO client.RMProxy: Connecting to ResourceManager at bigdata.cqmfin.com/192.168.100.131:8032
18/08/07 06:06:47 INFO input.FileInputFormat: Total input files to process : 1
18/08/07 06:06:47 INFO mapreduce.JobSubmitter: number of splits:1
18/08/07 06:06:48 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1533646480177_0004
18/08/07 06:06:48 INFO impl.YarnClientImpl: Submitted application application_1533646480177_0004
18/08/07 06:06:49 INFO mapreduce.Job: The url to track the job: http://bigdata.cqmfin.com:8088/proxy/application_1533646480177_0004/
18/08/07 06:06:49 INFO mapreduce.Job: Running job: job_1533646480177_0004
18/08/07 06:06:58 INFO mapreduce.Job: Job job_1533646480177_0004 running in uber mode : false
18/08/07 06:06:58 INFO mapreduce.Job: map 0% reduce 0%
18/08/07 06:07:04 INFO mapreduce.Job: map 100% reduce 0%
18/08/07 06:07:09 INFO mapreduce.Job: map 100% reduce 100%
18/08/07 06:07:10 INFO mapreduce.Job: Job job_1533646480177_0004 completed successfully
web查看hadoop
http://192.168.100.131:50070/dfshealth.html#tab-overview
配置 hbase
[root@bigdata data]# vi /opt/hbase-0.98.24-hadoop2/conf/hbase-site.xml
启动hbase
[root@bigdata data]# /opt/hbase-0.98.24-hadoop2/bin/start-hbase.sh
[root@bigdata bin]# ./hbase shell
2018-08-07 06:34:20,163 INFO [main] Configuration.deprecation: hadoop.native.lib is deprecated. Instead, use io.native.lib.available
HBase Shell; enter 'help
Type "exit
Version 0.98.24-hadoop2, r9c13a1c3d8cf999014f30104d1aa9d79e74ca3d6, Thu Dec 22 02:36:05 UTC 2016
hbase(main):001:0> create 'table1','col1'
2018-08-07 06:34:31,327 WARN [main] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/hbase-0.98.24-hadoop2/lib/slf4j-log4j12-1.6.4.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop-2.8.3/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
0 row(s) in 1.7030 seconds
=> Hbase::Table - table1
hbase(main):002:0> put 'table1','row1','col1:a','value1'
0 row(s) in 0.0990 seconds
hbase(main):003:0> scan 'table1'
ROW COLUMN+CELL
row1 column=col1:a, timestamp=1533648877063, value=value1
1 row(s) in 0.0270 seconds
hbase(main):004:0>
web查看hbase
http://192.168.100.131:60010/master-status
配置hive-site.xml
cp /opt/apache-hive-2.3.3-bin/conf/hive-default.xml.template /opt/apache-hive-2.3.3-bin/conf/hive-site.xml
把{system:java.io.tmpdir} 改成 /home/hadoop/hive-2.3.0/tmp
把 {system:user.name} 改成 {user.name}
进入 hadoop 安装目录 执行hadoop命令新建/user/hive/warehouse目录,并授权,用于存储文件
hadoop fs -mkdir -p /user/hive/warehouse hadoop fs -mkdir -p /user/hive/tmp hadoop fs -mkdir -p /user/hive/log hadoop fs -chmod -R 777 /user/hive/warehouse hadoop fs -chmod -R 777 /user/hive/tmp hadoop fs -chmod -R 777 /user/hive/log
创建 tmp 文件
mkdir /opt/apache-hive-2.3.3-bin/tmp
新建 hive-env.sh
cp /opt/apache-hive-2.3.3-bin/conf/hive-env.sh.template hive-env.sh
vim /opt/apache-hive-2.3.3-bin/conf/hive-env.sh
HADOOP_HOME=/opt/hadoop-2.8.3
export HIVE_CONF_DIR=/opt/apache-hive-2.3.3-bin/conf
export HIVE_AUX_JARS_PATH=/opt/apache-hive-2.3.3-bin/lib
mysql 驱动包
cp /opt/mysql-connector-java-5.1.38.jar /opt/apache-hive-2.3.3-bin/lib
初始化 mysql
MySQL数据库进行初始化
mysql 中创建 hive 库
[root@bigdata conf]# mysql -u root -p
Enter password:
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 2
Server version: 5.6.28 MySQL Community Server (GPL)
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> create database hive
-> ;
Query OK, 1 row affected (0.00 sec)
执行hive库脚本
cd /opt/apache-hive-2.3.3-bin/bin ./schematool -initSchema -dbType mysql
如果看到如下,表示初始化成功
Starting metastore schema initialization to 2.3.0 Initialization script hive-schema-2.3.0.mysql.sql Initialization script completed schemaTool completed
查看 mysql 数据库
/usr/local/mysql/bin/mysql -uroot -p
mysql> show databases;
启动Hive
/opt/apache-hive-2.3.3-bin/bin/hive
which: no hbase in (/opt/jdk1.8.0_91/bin:/opt/hadoop-2.8.3/sbin:/opt/hadoop-2.8.3/bin:/opt/apache-hive-2.3.3-bin//bin:/opt/spark-2.1.3-bin-without-hadoop/bin:/opt/scala-2.11.12/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin)
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/apache-hive-2.3.3-bin/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop-2.8.3/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Logging initialized using configuration in jar:file:/opt/apache-hive-2.3.3-bin/lib/hive-common-2.3.3.jar!/hive-log4j2.properties Async: true
Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
hive>
spark配置
[root@bigdata spark-2.1.3-bin-without-hadoop]# cp /opt/spark-2.1.3-bin-without-hadoop/conf/spark-env.sh.template /opt/spark-2.1.3-bin-without-hadoop/conf/spark-env.sh
vim /opt/spark-2.1.3-bin-without-hadoop/conf/spark-env.sh
JAVA_HOME=/opt/jdk1.8.0_91
SCALA_HOME=/opt/scala-2.11.12
HADOOP_HOME=/opt/hadoop-2.8.3
HADOOP_CONF_DIR=/opt/hadoop-2.8.3/etc/hadoop
SPARK_DIST_CLASSPATH=$(/opt/hadoop-2.8.3/bin/hadoop classpath)
#SPARK_DIST_CLASSPATH=$(hadoop --config /home/hadoop/hadoop-2.7.2/etc/hadoop classpath)
#export SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/home/hadoop/hadoop-2.7.2/share/hadoop/tools/lib/*"
#spark
SPARK_HOME=/opt/spark-2.1.3-bin-without-hadoop
SPARK_MASTER_IP=bigdata.cqmfin.com
SPARK_WORKER_CORES=2
SPARK_WORKER_INSTANCES=1
SPARK_WORKER_MEMORY=2g
SPARK_EXECUTOR_CORES=1
SPARK_EXECUTOR_MEMORY=1g
#spark
SPARK_WORKER_DIR=/opt/spark-2.1.3-bin-without-hadoop/work
SPARK_LOG_DIR=/opt/spark-2.1.3-bin-without-hadoop/logs
SPARK_PID_DIR=/opt/spark-2.1.3-bin-without-hadoop/pid
#LZO
#export SPARK_CLASSPATH=/opt/hadoop/share/hadoop/common/hadoop-lzo-0.4.20-SNAPSHOT.jar
export SPARK_CLASSPATH=$SPARK_CLASSPATH:$CLASSPATH
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$HADOOP_HOME/lib/native
hive关联spark
加入必要jar
cp /opt/spark-2.1.3-bin-without-hadoop/jars/scala-library-2.11.8.jar /opt/apache-hive-2.3.3-bin/lib/
[hadoop@bigdata01 bin]$ cp /opt/spark-2.1.3-bin-without-hadoop/jars/spark-network-common_2.11-2.1.3.jar /opt/apache-hive-2.3.3-bin/lib
[hadoop@bigdata01 bin]$ cp /opt/spark-2.1.3-bin-without-hadoop/jars/spark-core_2.11-2.1.3.jar /opt/apache-hive-2.3.3-bin/lib
配置hive
vi hive-site.xml
启动spark
/opt/spark-2.1.3-bin-without-hadoop/sbin/start-all.sh
验证spark安装正确性
/opt/spark-2.1.3-bin-without-hadoop/run-example SparkPi
web查看spark
http://192.168.100.131:8081/
测试hbase+hive
hbase
hbase(main):005:0> put 'bigdata:customer_user_id','11','d:register_date','2018-08-07'
0 row(s) in 0.0070 seconds
hbase(main):003:0> put 'bigdata:customer_user_id','1','d:register_date','2018-08-07'
0 row(s) in 0.0800 seconds
hbase(main):006:0>
hive:
create database IF NOT EXISTS cqmfinbigdata;
use cqmfinbigdata;
hive> select * from customer_user;
OK
1 2018-08-07
11 2018-08-07
hive> select count(user_id) from customer_user;
Query ID = root_20180807080820_1d485fc8-a712-40fd-924f-835f7f6a1f51
Total jobs = 1
Launching Job 1 out of 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=
In order to set a constant number of reducers:
set mapreduce.job.reduces=
Starting Spark Job = 38bd4501-02f6-4d5d-b22a-64d760eb2e19
Query Hive on Spark job[1] stages: [2, 3]
Status: Running (Hive on Spark job[1])
--------------------------------------------------------------------------------------
STAGES ATTEMPT STATUS TOTAL COMPLETED RUNNING PENDING FAILED
--------------------------------------------------------------------------------------
Stage-2 ........ 0 FINISHED 1 1 0 0 0
Stage-3 ........ 0 FINISHED 1 1 0 0 0
--------------------------------------------------------------------------------------
STAGES: 02/02 [==========================>>] 100% ELAPSED TIME: 3.03 s
--------------------------------------------------------------------------------------
Status: Finished successfully in 3.03 seconds
OK
2
Time taken: 3.351 seconds, Fetched: 1 row(s)
hive>