Apache Hadoop 整合 hive hue spark hbase

阶段三 整合hive hue spark hbase

 

master

slave1

slave2

HDFS

NameNode

NameNode

 

DataNode

DataNode

DataNode

journalnode

journalnode

journalnode

YARM

Resourcemanager

JobHistory Server

Resourcemanager

NodeManager

NodeManager

NodeManager

zookeeper

zk-server

zk-server

zk-server

hive

hiveserver2

 

 

mysql

mysql

 

 

hbase

regionservers 

regionservers 

regionservers 

spark

 

 

master

Worker

Worker

Worker

hue

 

 

server

livy.server

 

 

livy.server

一、安装scala

tar -zxvf scala-2.11.12.tgz -C /usr/local

mv /usr/local/scala-2.11.12 /user/local/scala

vim /etc/profile

#scala

export PATH=$PATH:/usr/local/scala/bin

source /etc/profile

二、安装mysql

1、解压tar

tar -zxvf mysql-5.7.21-linux-glibc2.12-x86_64.tar.gz -C /usr/local

2、创建MySQL用户并赋予权限

groupadd mysql

useradd -r -g mysql mysql

chown mysql:mysql -R mysql-5.7.21/

3、修改配置文件

vi /etc/my.cnf

[client]

port          = 3306

socket        = /usr/local/mysql-5.7.21/mysql.sock

default-character-set = utf8mb4



[mysqld_safe]

socket        = /usr/local/mysql-5.7.21/mysql.sock

nice          = 0



[mysqld]

sql_mode             = NO_AUTO_CREATE_USER,STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION,NO_ZERO_DATE,NO_ZERO_IN_DATE

explicit_defaults_for_timestamp = 1

skip-ssl

character-set-server = utf8

user                    = root

socket                  = /usr/local/mysql.sock

port                    = 3306

basedir                 = /usr/local/mysql-5.7.21

datadir                 = /usr/local/mysql-5.7.21/data

tmpdir                  = /tmp

pid-file                = /usr/local/mysqld.pid



back_log                = 1000

max_connections         = 5000

max_connect_errors      = 10

max_allowed_packet      = 32M

server-id=1

skip-external-locking

skip-name-resolve

#skip-grant-tables

wait_timeout            = 31536000

interactive_timeout     = 31536000

max_heap_table_size     = 16M

tmp_table_size          = 16M

query_cache_size        = 0

query_cache_type        = 0

ft_min_word_len         = 4



log_error_verbosity     = 3

log_error     = /usr/local/mysql-5.7.21/log/alert_systemapi.log

slow_query_log    = 1

slow_query_log_file     = /usr/local/mysql-5.7.21/log/mysql_slow.log

long_query_time = 0.5



key_buffer_size                 = 32M

read_buffer_size                = 32M

read_rnd_buffer_size            = 8M

bulk_insert_buffer_size         = 8M

myisam_sort_buffer_size         = 8M

myisam_max_sort_file_size       = 16M

myisam_repair_threads           = 1

myisam_recover_options          = default



log_bin             = /usr/local/mysql-5.7.21/binlog/cdh-1-bin.log

binlog_format       = row

expire_logs_days    = 2

max_binlog_size     = 100M

binlog_cache_size   = 128M



innodb_autoextend_increment     = 64

innodb_log_file_size            = 16M

4、初始化

创建所需要的目录:

mkdir /usr/local/mysql-5.7.21/log

mkdir /usr/local/mysql-5.7.21/binlog/

mkdir /usr/local/mysql-5.7.21/data

/usr/local/mysql-5.7.21/bin/mysqld --defaults-file=/etc/my.cnf --user=mysql --initialize-insecure

5、修改root密码并赋权限

打开配置文件/etc/my.cnf,然后在里面找到 [mysqld] 这一项,然后在该配置项下添加skip-grant-tables 这个配置,然后保存文件。

启动mysql:

cp /usr/local/mysql-5.7.21/support-files/mysql.server /etc/init.d/mysql

service mysql start

root登录修改root密码:

UPDATE mysql.user SET authentication_string=PASSWORD('Jkwl@2021') WHERE User='root';

update mysql.user set host='%' where user='root';

GRANT ALL PRIVILEGES ON *.* TO 'root'@'%';

去除配置文件/etc/my.cnf,然后在里面找到 [mysqld] 这一项,删除 skip-grant-tables 这个配置,重启mysql

6、配置需要的数据库和用户

create database hive DEFAULT CHARACTER SET utf8;

create database hue DEFAULT CHARACTER SET utf8;

grant all on hue.* TO 'hue'@'%' IDENTIFIED BY 'Jkwl@2021';

grant all on hive.* TO 'hive'@'%' IDENTIFIED BY 'Jkwl@2021';

三、安装hive

1、解压tar

 tar -zxvf apache-hive-3.1.2-bin.tar.gz -C /opt/module

mv apache-hive-3.1.2 hive

2、修改环境变量文件

    vim /etc/profile 添加

#hive

export HIVE_HOME=/opt/module/hive

export PATH=$PATH:$HIVE_HOME/bin

source /etc/profile

所有服务器都需要更改。

3、配置文件修改

cp hive-env.sh.template hive-env.sh

vi hive-env.sh

export HIVE_HOME=/opt/module/hive

export PATH=$HIVE_HOME/bin:$PATH

export HIVE_AUX_JARS_PATH=$HIVE_HOME/bin

export HIVE_CONF_DIR=$HIVE_HOME/conf

export HADOOP_USER_CLASSPATH_FIRST=true

cp hive-default.xml.template hive-site.xml

vi hive-site.xml



    javax.jdo.option.ConnectionDriverName

    com.mysql.jdbc.Driver

    Driver class name for a JDBC metastore

  



 

    javax.jdo.option.ConnectionURL

    jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true 

    JDBC connect string for a JDBC metastore

  



 

    javax.jdo.option.ConnectionUserName

    hive

    Username to use against metastore database

  



 

    javax.jdo.option.ConnectionPassword

    Jkwl@2021

    password to use against metastore database

  

将所有 ${system:Java.io.tmpdir}替换成/opt/module/hive/iotmp

4、启动hive

初始化mysql中hive元数据表

./schematool -dbType mysql -initSchema

nohup hive --service hiveserver2 --hiveconf hive.server2.thrift.port=10001 &  

出现错误:Exception in thread "main" java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)V

解决办法:删除/opt/module/hive/lib/guava-19.0.jar;将/opt/module/hadoop-3.2.2/share/hadoop/common/lib/guava-27.0-jre.jar复制到/opt/module/hive/lib/

出现错误:com.ctc.wstx.exc.WstxParsingException: Illegal character entity: expansion character (code 0x8at [row,col,system-id]: [3215,96,"file:/opt/module/hive/conf/hive-site.xml"]

解决办法:vim /opt/module/hive/conf/hive-site.xml +3215 删除

出现告警:WARN: Establishing SSL connection without server's identity verification is not recommended. According to MySQL 5.5.45+, 5.6.26+ and 5.7.6+ requirements SSL connection must be established by default if explicit option isn't set. For compliance with existing applications not using SSL the verifyServerCertificate property is set to 'false'. You need either to explicitly disable SSL by setting useSSL=false, or set useSSL=true and provide truststore for server certificate verification.

解决办法:

javax.jdo.option.ConnectionURL

jdbc:mysql://master:3306/hive?useSSL=false

      JDBC connect string for a JDBC metastore.To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL. For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.

    

5、下发hive目录到其他服务器

注意:其他服务器不用启动服务

   

四、安装hbase

1、解压tar

tar -zxvf hbase-2.3.6-bin.tar.gz -C /opt/module/

mv hbase-2.3.6 hbase

2、修改环境变量文件

    vim /etc/profile 添加

#hive

export HBASE_HOME=/opt/module/hbase

export PATH=$PATH:$HBASE_HOME/bin

source /etc/profile

3、修改配置文件

vim hbase-env.sh

export JAVA_HOME=/usr/java/jdk1.8.0_181/

export HBASE_LOG_DIR=${HBASE_HOME}/logs

export HBASE_MANAGES_ZK=false

hbase-site.xml 添加下面类容 

 

    hbase.rootdir

    hdfs://master:9000/hbase

  

  

    hbase.cluster.distributed

    true

  

  

    hbase.master

    hdfs://slave2:60000

  

  

    hbase.zookeeper.quorum

    master,slave1,slave2

  

vim regionservers

master

slave1

slave2

4、下发hbase目录

5、启动

start-hbase.sh

日志报错:java.lang.IllegalArgumentException: object is not an instance of declaring class

Hadoop 3.3.X不支持hbase 2.3.X 换成2.4.6也不行。

重新安装hadoop3.2.X,检查是否安装成功:

[hadoop@master ~]$ hbase shell

hbase:001:0> list

TABLE

0 row(s)

Took 0.6578 seconds

=> []

任意一台启动thrift服务:hbase-daemon.sh start thrift 

五、安装spark

1、解压tar

tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz -C /opt/module/

mv spark-3.1.2 spark

2、修改环境变量

vim /etc/profile

#spark

export SPARK_HOME=/opt/module/spark

export PATH=$PATH:$SPARK_HOME/bin

source /etc/profile

3、修改配置文件

mv spark-env.sh.template spark-env.sh

vim spark-env.sh

export SCALA_HOME=/usr/local/scala

export JAVA_HOME=/usr/java/jdk1.8.0_181

export SPARK_MASTER_IP=192.168.159.134

export SPARK_WORKER_MEMORY=512m

export HADOOP_CONF_DIR=/opt/module/hadoop-3.2.2/etc/Hadoop

vim workers

master

slave1

slave2

下发到其他节点。

4、启动

/opt/module/spark/sbin/start-all.sh

六、安装hue

1、解压tar

unzip apache-livy-0.7.1-incubating-bin.zip

mv apache-livy-0.7.1-incubating-bin /opt/module/livy

unzip hue-release-4.3.0.zip

mv hue-release-4.3.0 hue /opt/module/hue

2、修改配置文件

Livy conf

mv livy.conf.template livy.conf

vim livy.conf 

#默认使用hiveContext

livy.repl.enableHiveContext = true

#开启用户代理

livy.impersonation.enabled = true

#设置session空闲过期时间

livy.server.session.timeout = 1h

mv livy-env.sh.template livy-env.sh

vim livy-env.sh

export HADOOP_CONF_DIR=/opt/module/hadoop-3.2.2/etc/hadoop

export SPARK_HOME=/opt/module/spark

mv spark-blacklist.conf.template spark-blacklist.conf

vim spark-blacklist.conf

# spark.master

###########################################################################

Hue conf

yum install ant asciidoc cyrus-sasl-devel cyrus-sasl-gssapi cyrus-sasl-plain gcc gcc-c++

yum install krb5-devel libffi-devel libtidy  libxml2-devel libxslt-devel make maven mysql-devel

yum install openldap-devel python-devel

yum install sqlite-devel openssl-devel

yum install gmp-devel

vim /etc/maven/settings.xml



   

      alimaven

      aliyun maven

      http://maven.aliyun.com/nexus/content/groups/public/

      central

    

cd /opt/module/hue

make apps

此时可能会出现错误:

SyntaxError: invalid syntax

默认安装traitlets 5.0.5 不支持python 2.7

解决办法:

vim build/env/lib/python2.7/site-packages/setuptools/command/easy_install.py +328

self.index_url = self.index_url or http://pypi.doubanio.com/simple/

./build/env/bin/easy_install traitlets==4.2.2

重新make apps

cd /opt/module/hue/desktop/conf

mv pseudo-distributed.ini.tmpl pseudo-distributed.ini

vim pseudo-distributed.ini

[desktop]

  secret_key=dhaiodheqdhqiodhaldhalcbaklhfoadjaoda

  http_host=slave2

  http_port=8888

  time_zone=Asia/Shanghai

  dev=true

  send_dbug_messages=true

  erver_user=hadoop

  server_group=hadoop

  default_user=hadoop

  default_hdfs_superuser=hadoop

  [[django_admins]]

  [[custom]]

  [[auth]]

    idle_session_timeout=-1

  [[ldap]]

    [[[users]]]

    [[[groups]]]

    [[[ldap_servers]]]

  [[vcs]]

  [[database]]

    engine=mysql

    host=master

    port=3306

    user=hue

    password=Jkwl@2021

    name=hue

  [[session]]

  [[smtp]]

    host=localhost

    port=25

    user=

    password=

    tls=no

  [[kerberos]]

  [[oauth]]

  [[oidc]]

  [[metrics]]

[notebook]

  [[interpreters]]

    [[[hive]]]

      name=Hive

      interface=hiveserver2

    [[[impala]]]

      name=Impala

      interface=hiveserver2

    [[[spark]]]

      name=Scala

      interface=livy

    [[[pyspark]]]

      name=PySpark

      interface=livy

    [[[r]]]

      name=R

      interface=livy

    [[[jar]]]

      name=Spark Submit Jar

      interface=livy-batch

    [[[py]]]

      name=Spark Submit Python

      interface=livy-batch

    [[[text]]]

      name=Text

      interface=text

    [[[markdown]]]

      name=Markdown

      interface=text

    [[[mysql]]]

      name = MySQL

      interface=rdbms

    [[[sqlite]]]

      name = SQLite

      interface=rdbms

    [[[postgresql]]]

      name = PostgreSQL

      interface=rdbms

    [[[oracle]]]

      name = Oracle

      interface=rdbms

    [[[solr]]]

      name = Solr SQL

      interface=solr

    [[[pig]]]

      name=Pig

      interface=oozie

    [[[java]]]

      name=Java

      interface=oozie

    [[[spark2]]]

      name=Spark

      interface=oozie

    [[[mapreduce]]]

      name=MapReduce

      interface=oozie

    [[[sqoop1]]]

      name=Sqoop1

      interface=oozie

    [[[distcp]]]

      name=Distcp

      interface=oozie

    [[[shell]]]

      name=Shell

      interface=oozie

[dashboard]

  [[engines]]

[hadoop]

  [[hdfs_clusters]]

    [[[default]]]

      fs_defaultfs=hdfs://myNameNodeServer

      webhdfs_url=http://master:14000/webhdfs/v1

      hadoop_conf_dir=/opt/module/hadoop-3.2.2/etc/hadoop

  [[yarn_clusters]]

    [[[default]]]

      resourcemanager_host=master

      resourcemanager_port=8032

      submit_to=True

      history_server_api_url=http://slave1:19888

      logical_name=yarnRM

      resourcemanager_api_url=http://master:8088

[beeswax]

  hive_server_host=master

  hive_server_port=10001

  [[ssl]]

[metastore]

[impala]

  [[ssl]]

[spark]

[oozie]

[filebrowser]

[pig]

[sqoop]

[proxy]

[hbase]

  hbase_clusters=(Cluster|master:9090,slave1:9090,slave2;9090)

  hbase_conf_dir=/opt/module/hbase/conf

  thrift_transport=framed

[search]

[libsolr]

[indexer]

[jobsub]

[jobbrowser]

[security]

[zookeeper]

  [[clusters]]

    [[[default]]]

      host_ports=master:2181,slave1:2181,slave2:2181

      principal_name=zookeeper

[useradmin]

  [[password_policy]]

[liboozie]

[aws]

  [[aws_accounts]]

[azure]

  [[azure_accounts]]

    [[[default]]]

  [[adls_clusters]]

    [[[default]]]

[libsentry]

[libzookeeper]

[librdbms]

  [[databases]]

      nice_name="My SQL DB"

      name=mysql

      engine=mysql

      host=master

      port=3306

      user=root

      password=Jkwl@2021

[libsaml]

[libopenid]

[liboauth]

[kafka]

  [[kafka]]

[metadata]

  [[optimizer]]

  [[navigator]]

3、启动

cd /opt/module/livy/bin/

nohup ./livy-server >/dev/null 2>&1 &

初始化hue数据库:

./build/env/bin/hue syncdb

./build/env/bin/hue migrate

cd /opt/module/hue/build/env/bin/

nohup ./supervisor &

你可能感兴趣的:(hadoop)