hadoop2.8.3 + hive2.3.3 + hbase0.98.24 + spark2.1.3安装

阅读更多

安装虚拟机

安装文件:CentOS-6.6-x86_64-bin-DVD1.iso

 

 

 

 

 

 

 

 

 

 

 

自定义硬件

内存增加到8G,处理器修改为4核

 

自动安装

永久修改hostname

vim /etc/sysconfig/network

HOSTNAME=bigdata.cqmfin.com

GATEWAY=192.168.100.2

配置Host

vim /etc/hosts

192.168.100.131 bigdata.cqmfin.com

永久关闭防火墙

chkconfig iptables off

关闭selinux

vim /etc/sysconfig/selinux

 

下载安装文件

需要安装:hadoop V2.8.3、hive V2.3.3、hbase 0.98.24、spark V2.1.3(scala-2.11.12)

http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.8.3/hadoop-2.8.3-src.tar.gz

http://archive.apache.org/dist/hadoop/common/hadoop-2.8.3/hadoop-2.8.3-src.tar.gz

http://archive.apache.org/dist/hbase/0.98.24/

[root@bigdata opt]# ll

total 969008

-rw-r--r--. 1 root root 232229830 Aug 7 02:55 apache-hive-2.3.3-bin.tar.gz

-rw-r--r--. 1 root root 244469481 Aug 7 02:55 hadoop-2.8.3.tar.gz

-rw-r--r--. 1 root root 66281053 Aug 7 02:54 hbase-0.98.24-hadoop2-bin.tar.gz

-rw-r--r--. 1 root root 181367942 Aug 7 02:54 jdk-8u91-linux-x64.gz

-rw-r--r--. 1 root root 23157347 Aug 7 03:03 MySQL-client-5.6.28-1.linux_glibc2.5.x86_64.rpm

-rw-r--r--. 1 root root 983911 Aug 7 02:55 mysql-connector-java-5.1.38.jar

-rw-r--r--. 1 root root 4588492 Aug 7 03:03 MySQL-devel-5.6.28-1.linux_glibc2.5.x86_64.rpm

-rw-r--r--. 1 root root 88909659 Aug 7 03:03 MySQL-server-5.6.28-1.linux_glibc2.5.x86_64.rpm

-rw-r--r--. 1 root root 29114457 Aug 7 02:55 scala-2.11.12.tgz

-rw-r--r--. 1 root root 121141828 Aug 7 02:55 spark-2.1.3-bin-without-hadoop.tgz

 

安装mysql

[root@KFGL-test app]# rpm -qa|grep mysql

mysql-libs-5.1.71-1.el6.x86_64

[root@KFGL-test app]# yum remove mysql-libs

Loaded plugins: fastestmirror, security

Setting up Remove Process

Resolving Dependencies

rpm安装

[root@KFGL-test app]# rpm -ivh MySQL-server-5.6.28-1.linux_glibc2.5.x86_64.rpm

warning: MySQL-server-5.6.28-1.linux_glibc2.5.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY

Preparing... ########################################### [100%]

1:MySQL-server ########################################### [100%]

warning: user mysql does not exist - using root

warning: group mysql does not exist - using root

 

[root@KFGL-test app]# rpm -ivh MySQL-client-5.6.28-1.linux_glibc2.5.x86_64.rpm

warning: MySQL-client-5.6.28-1.linux_glibc2.5.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY

Preparing... ########################################### [100%]

1:MySQL-client ########################################### [100%]

[root@KFGL-test app]# rpm -ivh MySQL-devel-5.6.28-1.linux_glibc2.5.x86_64.rpm

warning: MySQL-devel-5.6.28-1.linux_glibc2.5.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY

Preparing... ########################################### [100%]

1:MySQL-devel ########################################### [100%]

 

设置mysql环境

启动

[root@KFGL-test app]# mysqld_safe --skip-grant-tables &

[1] 29298

[root@KFGL-test app]# 160720 10:09:59 mysqld_safe Logging to '/var/lib/mysql/KFGL-test.err'.

160720 10:10:00 mysqld_safe Starting mysqld daemon with databases from /var/lib/mysql

设置密码

mysql> use mysql

Reading table information for completion of table and column names

You can turn off this feature to get a quicker startup with -A

 

Database changed

mysql> update user set password=password('123456') where user = 'root';

Query OK, 4 rows affected (0.00 sec)

Rows matched: 4 Changed: 4 Warnings: 0

 

mysql重启后再操作

mysql> SET PASSWORD = PASSWORD('123456');

Query OK, 0 rows affected (0.00 sec)

设置权限

mysql> GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123456' WITH GRANT OPTION;

Query OK, 0 rows affected (0.00 sec)

配置文件

[root@KFGL-test app]# vi /usr/my.cnf

 

max_allowed_packet=500M

wait_timeout=288000

interactive_timeout = 288000

lower_case_table_names=1

 

mysql启动命令

[root@KFGL-test app]# /etc/init.d/mysql restart|start|shutdown

 

解压

tar -zxvf jdk-8u91-linux-x64.gz

tar -zxvf apache-hive-2.3.3-bin.tar.gz

tar -zxvf hbase-0.98.24-hadoop2-bin.tar.gz

tar -zxvf scala-2.11.12.tgz

tar -zxvf spark-2.1.3-bin-without-hadoop.tgz

tar -zxvf hadoop-2.8.3.tar.gz

配置环境

vim /etc/profile

export JAVA_HOME=/opt/jdk1.8.0_91

export HADOOP_HOME=/opt/hadoop-2.8.3

export HADOOP_INSTALL=$HADOOP_HOME

export HADOOP_MAPRED_HOME=$HADOOP_HOME

export HADOOP_COMMON_HOME=$HADOOP_HOME

export HADOOP_HDFS_HOME=$HADOOP_HOME

export YARN_HOME=$HADOOP_HOME

export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native

export HIVE_HOME=/opt/apache-hive-2.3.3-bin/

export SCALA_HOME=/opt/scala-2.11.12

export SPARK_HOME=/opt/spark-2.1.3-bin-without-hadoop

export PATH=$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:${HIVE_HOME}/bin:$SPARK_HOME/bin:$SCALA_HOME/bin:$PATH

立即生效

source /etc/profile

 

配置core-site.xml

vim /opt/hadoop-2.8.3/etc/hadoop/core-site.xml

  

    fs.default.name

    hdfs://bigdata.cqmfin.com:9000

  

  

    hadoop.tmp.dir

    file:/opt/data/tmp

  

  

    hadoop.proxyuser.hadoop.hosts

    *

  

  

    hadoop.proxyuser.hadoop.groups

    root

 

配置hdfs-site.xml

vim /opt/hadoop-2.8.3/etc/hadoop/hdfs-site.xml

  

    dfs.replication

    1

  

  

    dfs.namenode.name.dir

    file:/opt/data/tmp/dfs/name

  

  

    dfs.datanode.data.dir

    file:/opt/data/tmp/dfs/data

  

dfs.permissions

false

need not permissions

配置yarn-site.xml

  

    yarn.resourcemanager.hostname

    bigdata.cqmfin.com

  

  

    yarn.nodemanager.aux-services

    mapreduce_shuffle

  

  

    yarn.log-aggregation-enable

    true

  

  

    yarn.log-aggregation.retain-seconds

    604800

  

yarn.resourcemanager.scheduler.class

org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler

 配置mapred-site.xml

  

    mapreduce.framework.name

    yarn

  

设置SSH无密码登录

Hadoop集群中的各个机器间会相互地通过SSH访问,每次访问都输入密码是不现实的,所以要配置各个机器间的

SSH是无密码登录的。

1、 在BigData上生成公钥

ssh-keygen -t rsa

一路回车,都设置为默认值,然后再当前用户的Home目录下的.ssh目录中会生成公钥文件(id_rsa.pub)和私钥文件(id_rsa)。

2、 分发公钥

ssh-copy-id bigdata.cqmfin.com

 

内存优化

[hadoop@bigdata01 hadoop]$ vi /opt/hadoop-2.8.3/etc/hadoop/hadoop-env.sh

export HADOOP_PORTMAP_OPTS="-Xms512m -Xmx1024m -XX:PermSize=512M -XX:MaxPermSize=1024m $HADOOP_PORTMAP_OPTS"

export HADOOP_CLIENT_OPTS="-Xms512m -Xmx1024m -XX:PermSize=512M -XX:MaxPermSize=1024m $HADOOP_CLIENT_OPTS"

 

格式化namenode

[root@bigdata ~] hadoop namenode -format

 

启动Hadoop

[root@bigdata ~]# start-all.sh

This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh

18/08/07 04:35:33 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

Starting namenodes on [bigdata.cqmfin.com]

bigdata.cqmfin.com: starting namenode, logging to /opt/hadoop-2.8.3/logs/hadoop-root-namenode-bigdata.cqmfin.com.out

localhost: starting datanode, logging to /opt/hadoop-2.8.3/logs/hadoop-root-datanode-bigdata.cqmfin.com.out

Starting secondary namenodes [bigdata.cqmfin.com]

bigdata.cqmfin.com: starting secondarynamenode, logging to /opt/hadoop-2.8.3/logs/hadoop-root-secondarynamenode-bigdata.cqmfin.com.out

18/08/07 04:35:48 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

starting yarn daemons

starting resourcemanager, logging to /opt/hadoop-2.8.3/logs/yarn-root-resourcemanager-bigdata.cqmfin.com.out

localhost: starting nodemanager, logging to /opt/hadoop-2.8.3/logs/yarn-root-nodemanager-bigdata.cqmfin.com.out

 

验证Hadoop安装成功

hadoop fs -mkdir /user

hadoop fs -mkdir /user/input

hadoop fs -put /opt/data/wc.input /user/wc.input

/opt/hadoop-2.8.3/bin/hadoop jar /opt/hadoop-2.8.3/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.3.jar wordcount /user/wc.input /user/output1

 

Java HotSpot(TM) 64-Bit Server VM warning: ignoring option PermSize=512M; support was removed in 8.0

Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=1024m; support was removed in 8.0

18/08/07 06:06:46 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

18/08/07 06:06:47 INFO client.RMProxy: Connecting to ResourceManager at bigdata.cqmfin.com/192.168.100.131:8032

18/08/07 06:06:47 INFO input.FileInputFormat: Total input files to process : 1

18/08/07 06:06:47 INFO mapreduce.JobSubmitter: number of splits:1

18/08/07 06:06:48 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1533646480177_0004

18/08/07 06:06:48 INFO impl.YarnClientImpl: Submitted application application_1533646480177_0004

18/08/07 06:06:49 INFO mapreduce.Job: The url to track the job: http://bigdata.cqmfin.com:8088/proxy/application_1533646480177_0004/

18/08/07 06:06:49 INFO mapreduce.Job: Running job: job_1533646480177_0004

18/08/07 06:06:58 INFO mapreduce.Job: Job job_1533646480177_0004 running in uber mode : false

18/08/07 06:06:58 INFO mapreduce.Job: map 0% reduce 0%

18/08/07 06:07:04 INFO mapreduce.Job: map 100% reduce 0%

18/08/07 06:07:09 INFO mapreduce.Job: map 100% reduce 100%

18/08/07 06:07:10 INFO mapreduce.Job: Job job_1533646480177_0004 completed successfully

web查看hadoop

http://192.168.100.131:50070/dfshealth.html#tab-overview

 

配置 hbase

[root@bigdata data]# vi /opt/hbase-0.98.24-hadoop2/conf/hbase-site.xml

hbase.rootdir

/opt/data/hbase

启动hbase

[root@bigdata data]# /opt/hbase-0.98.24-hadoop2/bin/start-hbase.sh

[root@bigdata bin]# ./hbase shell

2018-08-07 06:34:20,163 INFO [main] Configuration.deprecation: hadoop.native.lib is deprecated. Instead, use io.native.lib.available

HBase Shell; enter 'help' for list of supported commands.

Type "exit" to leave the HBase Shell

Version 0.98.24-hadoop2, r9c13a1c3d8cf999014f30104d1aa9d79e74ca3d6, Thu Dec 22 02:36:05 UTC 2016

 

hbase(main):001:0> create 'table1','col1'

2018-08-07 06:34:31,327 WARN [main] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

SLF4J: Class path contains multiple SLF4J bindings.

SLF4J: Found binding in [jar:file:/opt/hbase-0.98.24-hadoop2/lib/slf4j-log4j12-1.6.4.jar!/org/slf4j/impl/StaticLoggerBinder.class]

SLF4J: Found binding in [jar:file:/opt/hadoop-2.8.3/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]

SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.

0 row(s) in 1.7030 seconds

 

=> Hbase::Table - table1

hbase(main):002:0> put 'table1','row1','col1:a','value1'

0 row(s) in 0.0990 seconds

 

hbase(main):003:0> scan 'table1'

ROW COLUMN+CELL

row1 column=col1:a, timestamp=1533648877063, value=value1

1 row(s) in 0.0270 seconds

 

hbase(main):004:0>

web查看hbase

http://192.168.100.131:60010/master-status

 

 

 

 

配置hive-site.xml

cp /opt/apache-hive-2.3.3-bin/conf/hive-default.xml.template /opt/apache-hive-2.3.3-bin/conf/hive-site.xml

hive.exec.scratchdir /user/hive/tmp

hive.querylog.location /user/hive/log/hadoop Location of Hive run time structured log file

javax.jdo.option.ConnectionURL jdbc:mysql://192.168.252.124:3306/hive?createDatabaseIfNotExist=true JDBC connect string for a JDBC metastore. To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL. For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.

javax.jdo.option.ConnectionDriverName com.mysql.jdbc.Driver Driver class name for a JDBC metastore

javax.jdo.option.ConnectionUserName root Username to use against metastore database

javax.jdo.option.ConnectionPassword mima password to use against metastore database

把{system:java.io.tmpdir} 改成 /home/hadoop/hive-2.3.0/tmp

把 {system:user.name} 改成 {user.name}

进入 hadoop 安装目录 执行hadoop命令新建/user/hive/warehouse目录,并授权,用于存储文件

hadoop fs -mkdir -p /user/hive/warehouse hadoop fs -mkdir -p /user/hive/tmp hadoop fs -mkdir -p /user/hive/log hadoop fs -chmod -R 777 /user/hive/warehouse hadoop fs -chmod -R 777 /user/hive/tmp hadoop fs -chmod -R 777 /user/hive/log

创建 tmp 文件

mkdir /opt/apache-hive-2.3.3-bin/tmp

新建 hive-env.sh

cp /opt/apache-hive-2.3.3-bin/conf/hive-env.sh.template hive-env.sh

vim /opt/apache-hive-2.3.3-bin/conf/hive-env.sh

HADOOP_HOME=/opt/hadoop-2.8.3

export HIVE_CONF_DIR=/opt/apache-hive-2.3.3-bin/conf

export HIVE_AUX_JARS_PATH=/opt/apache-hive-2.3.3-bin/lib

mysql 驱动包

cp /opt/mysql-connector-java-5.1.38.jar /opt/apache-hive-2.3.3-bin/lib

 

初始化 mysql

MySQL数据库进行初始化

mysql 中创建 hive 库

[root@bigdata conf]# mysql -u root -p

Enter password:

Welcome to the MySQL monitor. Commands end with ; or \g.

Your MySQL connection id is 2

Server version: 5.6.28 MySQL Community Server (GPL)

 

Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.

 

Oracle is a registered trademark of Oracle Corporation and/or its

affiliates. Other names may be trademarks of their respective

owners.

 

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

 

mysql> create database hive

-> ;

Query OK, 1 row affected (0.00 sec)

执行hive库脚本

cd /opt/apache-hive-2.3.3-bin/bin ./schematool -initSchema -dbType mysql

如果看到如下,表示初始化成功

Starting metastore schema initialization to 2.3.0 Initialization script hive-schema-2.3.0.mysql.sql Initialization script completed schemaTool completed

查看 mysql 数据库

/usr/local/mysql/bin/mysql -uroot -p

mysql> show databases;

 

启动Hive

/opt/apache-hive-2.3.3-bin/bin/hive

which: no hbase in (/opt/jdk1.8.0_91/bin:/opt/hadoop-2.8.3/sbin:/opt/hadoop-2.8.3/bin:/opt/apache-hive-2.3.3-bin//bin:/opt/spark-2.1.3-bin-without-hadoop/bin:/opt/scala-2.11.12/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin)

SLF4J: Class path contains multiple SLF4J bindings.

SLF4J: Found binding in [jar:file:/opt/apache-hive-2.3.3-bin/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]

SLF4J: Found binding in [jar:file:/opt/hadoop-2.8.3/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]

SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.

SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]

 

Logging initialized using configuration in jar:file:/opt/apache-hive-2.3.3-bin/lib/hive-common-2.3.3.jar!/hive-log4j2.properties Async: true

Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.

hive>

 

spark配置

[root@bigdata spark-2.1.3-bin-without-hadoop]# cp /opt/spark-2.1.3-bin-without-hadoop/conf/spark-env.sh.template /opt/spark-2.1.3-bin-without-hadoop/conf/spark-env.sh

vim /opt/spark-2.1.3-bin-without-hadoop/conf/spark-env.sh

JAVA_HOME=/opt/jdk1.8.0_91

SCALA_HOME=/opt/scala-2.11.12

HADOOP_HOME=/opt/hadoop-2.8.3

HADOOP_CONF_DIR=/opt/hadoop-2.8.3/etc/hadoop

SPARK_DIST_CLASSPATH=$(/opt/hadoop-2.8.3/bin/hadoop classpath)

#SPARK_DIST_CLASSPATH=$(hadoop --config /home/hadoop/hadoop-2.7.2/etc/hadoop classpath)

#export SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:/home/hadoop/hadoop-2.7.2/share/hadoop/tools/lib/*"

#spark

SPARK_HOME=/opt/spark-2.1.3-bin-without-hadoop

SPARK_MASTER_IP=bigdata.cqmfin.com

SPARK_WORKER_CORES=2

SPARK_WORKER_INSTANCES=1

SPARK_WORKER_MEMORY=2g

SPARK_EXECUTOR_CORES=1

SPARK_EXECUTOR_MEMORY=1g

#spark

SPARK_WORKER_DIR=/opt/spark-2.1.3-bin-without-hadoop/work

SPARK_LOG_DIR=/opt/spark-2.1.3-bin-without-hadoop/logs

SPARK_PID_DIR=/opt/spark-2.1.3-bin-without-hadoop/pid

#LZO

#export SPARK_CLASSPATH=/opt/hadoop/share/hadoop/common/hadoop-lzo-0.4.20-SNAPSHOT.jar

export SPARK_CLASSPATH=$SPARK_CLASSPATH:$CLASSPATH

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$HADOOP_HOME/lib/native

 

hive关联spark

 

 

加入必要jar

cp /opt/spark-2.1.3-bin-without-hadoop/jars/scala-library-2.11.8.jar /opt/apache-hive-2.3.3-bin/lib/

[hadoop@bigdata01 bin]$ cp /opt/spark-2.1.3-bin-without-hadoop/jars/spark-network-common_2.11-2.1.3.jar /opt/apache-hive-2.3.3-bin/lib

[hadoop@bigdata01 bin]$ cp /opt/spark-2.1.3-bin-without-hadoop/jars/spark-core_2.11-2.1.3.jar /opt/apache-hive-2.3.3-bin/lib

配置hive

vi hive-site.xml



    hive.execution.engine
    spark


    hive.enable.spark.execution.engine
    true


   
     spark.master
     spark://bigdata.cqmfin.com:7077
   


    spark.serializer
    org.apache.spark.serializer.KryoSerializer

 

 

启动spark

/opt/spark-2.1.3-bin-without-hadoop/sbin/start-all.sh

验证spark安装正确性

/opt/spark-2.1.3-bin-without-hadoop/run-example SparkPi

web查看spark

http://192.168.100.131:8081/

 

 

 

测试hbase+hive

hbase

  1.  
    create_namespace 'bigdata'
  2.  
    create 'bigdata:customer_user_id','d'

hbase(main):005:0> put 'bigdata:customer_user_id','11','d:register_date','2018-08-07'

0 row(s) in 0.0070 seconds

hbase(main):003:0> put 'bigdata:customer_user_id','1','d:register_date','2018-08-07'

0 row(s) in 0.0800 seconds

hbase(main):006:0>

hive:

create database IF NOT EXISTS cqmfinbigdata;

use cqmfinbigdata;

  1.  
    CREATE external TABLE customer_user(
  2.  
    user_id string,
  3.  
    register_date date
  4.  
    )
  5.  
    stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
  6.  
    with serdeproperties("hbase.columns.mapping"=":key,d:register_date")
  7.  
    tblproperties("hbase.table.name"="bigdata:customer_user_id");

hive> select * from customer_user;

OK

1 2018-08-07

11 2018-08-07

 

hive> select count(user_id) from customer_user;

Query ID = root_20180807080820_1d485fc8-a712-40fd-924f-835f7f6a1f51

Total jobs = 1

Launching Job 1 out of 1

In order to change the average load for a reducer (in bytes):

set hive.exec.reducers.bytes.per.reducer=

In order to limit the maximum number of reducers:

set hive.exec.reducers.max=

In order to set a constant number of reducers:

set mapreduce.job.reduces=

Starting Spark Job = 38bd4501-02f6-4d5d-b22a-64d760eb2e19

 

Query Hive on Spark job[1] stages: [2, 3]

 

Status: Running (Hive on Spark job[1])

--------------------------------------------------------------------------------------

STAGES ATTEMPT STATUS TOTAL COMPLETED RUNNING PENDING FAILED

--------------------------------------------------------------------------------------

Stage-2 ........ 0 FINISHED 1 1 0 0 0

Stage-3 ........ 0 FINISHED 1 1 0 0 0

--------------------------------------------------------------------------------------

STAGES: 02/02 [==========================>>] 100% ELAPSED TIME: 3.03 s

--------------------------------------------------------------------------------------

Status: Finished successfully in 3.03 seconds

OK

2

Time taken: 3.351 seconds, Fetched: 1 row(s)

hive>

 

你可能感兴趣的:(大数据,hadoop,hive,hbase,spark)