WSL2-Ubuntu18.04 Linux环境下部署大数据开发测试环境

目录

  • hadoop3.2.1+Hive3.1.2+HBase2.2.4+Phoenix5.0.0+Zookeeper3.6.1+Kylin3.0.2+Kafka2.5.0+Scala2.12+Spark3.0.0+Flink1.10.1+Tez0.10.1 ...
    • Windows子系统WSL 2 部署与应用
    • 系统设置初始化部分
    • Mysql 安装部署配置部分
    • WSL 下初始化服务
    • 安装部署Java
    • Hadoop 3.2.1
    • Hive 3.1.2
    • HBase 2.2.4
    • Phoenix 5.0.0
    • Zookeeper 3.6.1
    • Kylin 3.0.2
    • Kafka 2.5.0
    • Maxwells
    • Scala 2.12.11
    • Spark 3.0.0
    • Flink 1.10.1
    • Tez 0.10.1
    • 启动服务
    • 关闭服务

hadoop3.2.1+Hive3.1.2+HBase2.2.4+Phoenix5.0.0+Zookeeper3.6.1+Kylin3.0.2+Kafka2.5.0+Scala2.12+Spark3.0.0+Flink1.10.1+Tez0.10.1 …

Windows子系统WSL 2 部署与应用

参考Windows子系统WSL 2 部署与应用

系统设置初始化部分

root@kylin:~# sudo apt update && sudo apt upgrade
root@kylin:~# cd /root
root@kylin:~# vim /etc/profile
root@kylin:~# sudo apt install ssh
root@kylin:~# service ssh start
root@kylin:~# sshd -T
root@kylin:~# sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config
root@kylin:~# sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
root@kylin:~# service ssh restart
root@kylin:~# ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key
root@kylin:~# ssh-keygen -t dsa -f /etc/ssh/ssh_host_ecdsa_key
root@kylin:~# ssh-keygen -t dsa -f /etc/ssh/ssh_host_ed25519_key
root@kylin:~# service ssh restart
root@kylin:~# ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key
root@kylin:~# service ssh restart
root@kylin:~# service ssh status
# 配置本地ssh免密登陆
root@kylin:~# ssh-keygen -t rsa
root@kylin:~# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
root@kylin:~# chmod 0600 ~/.ssh/authorized_keys
root@kylin:~# ssh localhost
# 查看 SELinux状态
root@kylin:~# getenforce

Command 'getenforce' not found, but can be installed with:

apt install selinux-utils
# 禁用透明大页(THP)
root@kylin:~# echo never > /sys/kernel/mm/transparent_hugepage/enabled
root@kylin:~# echo never > /sys/kernel/mm/transparent_hugepage/defrag
root@kylin:~# echo 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' >> /etc/rc.local
root@kylin:~# echo 'echo never > /sys/kernel/mm/transparent_hugepage/defrag' >> /etc/rc.local
# 禁用交换分区(swap)
root@kylin:~# sudo sysctl vm.swappiness=1
vm.swappiness = 1
root@kylin:~# echo 'vm.swappiness=1' >> /etc/sysctl.conf

Mysql 安装部署配置部分

#Mysql相关
root@kylin:~# sudo apt-get install mysql-server mysql-client libmysql-java
root@kylin:~# cat /etc/mysql/my.cnf
#
# The MySQL database server configuration file.
#
# You can copy this to one of:
# - "/etc/mysql/my.cnf" to set global options,
# - "~/.my.cnf" to set user-specific options.
#
# One can use all long options that the program supports.
# Run program with --help to get a list of available options and with
# --print-defaults to see which it would actually understand and use.
#
# For explanations see
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html

#
# * IMPORTANT: Additional settings that can override those from this file!
#   The files must end with '.cnf', otherwise they'll be ignored.
#

!includedir /etc/mysql/conf.d/
!includedir /etc/mysql/mysql.conf.d/
root@kylin:~# ll /etc/mysql/mysql.conf.d/
total 24
drwxr-xr-x 2 root root 4096 May 28 18:52 ./
drwxr-xr-x 4 root root 4096 May 28 18:41 ../
-rw-r--r-- 1 root root 4097 May 28 18:52 mysqld.cnf
-rw-r--r-- 1 root root 3054 May 28 18:44 mysqld.cnf.bak
-rw-r--r-- 1 root root   21 Jan 12  2018 mysqld_safe_syslog.cnf
root@kylin:~# cat /etc/mysql/mysql.conf.d/mysqld.cnf
#
# The MySQL database server configuration file.
#
# You can copy this to one of:
# - "/etc/mysql/my.cnf" to set global options,
# - "~/.my.cnf" to set user-specific options.
#
# One can use all long options that the program supports.
# Run program with --help to get a list of available options and with
# --print-defaults to see which it would actually understand and use.
#
# For explanations see
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html

# This will be passed to all mysql clients
# It has been reported that passwords should be enclosed with ticks/quotes
# escpecially if they contain "#" chars...
# Remember to edit /etc/mysql/debian.cnf when changing the socket location.

# Here is entries for some specific programs
# The following values assume you have at least 32M ram

[mysqld_safe]
socket          = /var/run/mysqld/mysqld.sock
nice            = 0

#log-error=/var/log/mysqld.log
#pid-file=/var/run/mysqld/mysqld.pid

sql_mode=STRICT_ALL_TABLES

[mysqld]
#
# * Basic Settings
#
user            = mysql
pid-file        = /var/run/mysqld/mysqld.pid
socket          = /var/run/mysqld/mysqld.sock
port            = 3306
basedir         = /usr
datadir         = /var/lib/mysql
tmpdir          = /tmp
lc-messages-dir = /usr/share/mysql
skip-external-locking

# Here is an option file with Cloudera recommended settings:
#datadir=/var/lib/mysql
#socket=/var/lib/mysql/mysql.sock
transaction-isolation = READ-COMMITTED

symbolic-links = 0

key_buffer_size = 32M
#max_allowed_packet = 32M
thread_stack = 256K
thread_cache_size = 64
query_cache_limit = 8M
query_cache_size = 64M
query_cache_type = 1

max_connections = 550

log_bin=/var/lib/mysql/mysql_binary_log

server_id=1

binlog_format = mixed

read_buffer_size = 2M
read_rnd_buffer_size = 16M
sort_buffer_size = 8M
join_buffer_size = 8M

# InnoDB settings
innodb_file_per_table = 1
innodb_flush_log_at_trx_commit  = 2
innodb_log_buffer_size = 64M
innodb_buffer_pool_size = 4G
innodb_thread_concurrency = 8
innodb_flush_method = O_DIRECT
innodb_log_file_size = 512M

#
# Instead of skip-networking the default is now to listen only on
# localhost which is more compatible and is not less secure.
#bind-address           = 127.0.0.1
#
# * Fine Tuning
#
#key_buffer_size                = 16M
#max_allowed_packet     = 16M
#thread_stack           = 192K
#thread_cache_size       = 8
# This replaces the startup script and checks MyISAM tables if needed
# the first time they are touched
myisam-recover-options  = BACKUP
#max_connections        = 100
#table_open_cache       = 64
#thread_concurrency     = 10
#
# * Query Cache Configuration
#
#query_cache_limit      = 1M
#query_cache_size        = 16M
#
# * Logging and Replication
#
# Both location gets rotated by the cronjob.
# Be aware that this log type is a performance killer.
# As of 5.1 you can enable the log at runtime!
#general_log_file        = /var/log/mysql/mysql.log
#general_log             = 1
#
# Error log - should be very few entries.
#
log_error = /var/log/mysql/error.log
#
# Here you can see queries with especially long duration
#slow_query_log         = 1
#slow_query_log_file    = /var/log/mysql/mysql-slow.log
#long_query_time = 2
#log-queries-not-using-indexes
#
# The following can be used as easy to replay backup logs or for replication.
# note: if you are setting up a replication slave, see README.Debian about
#       other settings you may need to change.
#server-id              = 1
#log_bin                        = /var/log/mysql/mysql-bin.log
expire_logs_days        = 10
max_binlog_size   = 100M
#binlog_do_db           = include_database_name
#binlog_ignore_db       = include_database_name
#
# * InnoDB
#
# InnoDB is enabled by default with a 10MB datafile in /var/lib/mysql/.
# Read the manual for more InnoDB related options. There are many!
#
# * Security Features
#
# Read the manual, too, if you want chroot!
# chroot = /var/lib/mysql/
#
# For generating SSL certificates I recommend the OpenSSL GUI "tinyca".
#
# ssl-ca=/etc/mysql/cacert.pem
# ssl-cert=/etc/mysql/server-cert.pem
# ssl-key=/etc/mysql/server-key.pemroot@kylin:~# cat /etc/mysql/my.cnf
#
# The MySQL database server configuration file.
#
# You can copy this to one of:
# - "/etc/mysql/my.cnf" to set global options,
# - "~/.my.cnf" to set user-specific options.
#
# One can use all long options that the program supports.
# Run program with --help to get a list of available options and with
# --print-defaults to see which it would actually understand and use.
#
# For explanations see
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html

#
# * IMPORTANT: Additional settings that can override those from this file!
#   The files must end with '.cnf', otherwise they'll be ignored.
#

!includedir /etc/mysql/conf.d/
!includedir /etc/mysql/mysql.conf.d/
root@kylin:~# ll /etc/mysql/mysql.conf.d/
total 24
drwxr-xr-x 2 root root 4096 May 28 18:52 ./
drwxr-xr-x 4 root root 4096 May 28 18:41 ../
-rw-r--r-- 1 root root 4097 May 28 18:52 mysqld.cnf
-rw-r--r-- 1 root root 3054 May 28 18:44 mysqld.cnf.bak
-rw-r--r-- 1 root root   21 Jan 12  2018 mysqld_safe_syslog.cnf
root@kylin:~# cat /etc/mysql/mysql.conf.d/mysqld.cnf
#
# The MySQL database server configuration file.
#
# You can copy this to one of:
# - "/etc/mysql/my.cnf" to set global options,
# - "~/.my.cnf" to set user-specific options.
#
# One can use all long options that the program supports.
# Run program with --help to get a list of available options and with
# --print-defaults to see which it would actually understand and use.
#
# For explanations see
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html

# This will be passed to all mysql clients
# It has been reported that passwords should be enclosed with ticks/quotes
# escpecially if they contain "#" chars...
# Remember to edit /etc/mysql/debian.cnf when changing the socket location.

# Here is entries for some specific programs
# The following values assume you have at least 32M ram

[mysqld_safe]
socket          = /var/run/mysqld/mysqld.sock
nice            = 0

#log-error=/var/log/mysqld.log
#pid-file=/var/run/mysqld/mysqld.pid

sql_mode=STRICT_ALL_TABLES

[mysqld]
#
# * Basic Settings
#
user            = mysql
pid-file        = /var/run/mysqld/mysqld.pid
socket          = /var/run/mysqld/mysqld.sock
port            = 3306
basedir         = /usr
datadir         = /var/lib/mysql
tmpdir          = /tmp
lc-messages-dir = /usr/share/mysql
skip-external-locking

# Here is an option file with Cloudera recommended settings:
#datadir=/var/lib/mysql
#socket=/var/lib/mysql/mysql.sock
transaction-isolation = READ-COMMITTED

symbolic-links = 0

key_buffer_size = 32M
#max_allowed_packet = 32M
thread_stack = 256K
thread_cache_size = 64
query_cache_limit = 8M
query_cache_size = 64M
query_cache_type = 1

max_connections = 550

log_bin=/var/lib/mysql/mysql_binary_log

server_id=1

binlog_format = mixed

read_buffer_size = 2M
read_rnd_buffer_size = 16M
sort_buffer_size = 8M
join_buffer_size = 8M

# InnoDB settings
innodb_file_per_table = 1
innodb_flush_log_at_trx_commit  = 2
innodb_log_buffer_size = 64M
innodb_buffer_pool_size = 4G
innodb_thread_concurrency = 8
innodb_flush_method = O_DIRECT
innodb_log_file_size = 512M

#
# Instead of skip-networking the default is now to listen only on
# localhost which is more compatible and is not less secure.
#bind-address           = 127.0.0.1
#
# * Fine Tuning
#
#key_buffer_size                = 16M
#max_allowed_packet     = 16M
#thread_stack           = 192K
#thread_cache_size       = 8
# This replaces the startup script and checks MyISAM tables if needed
# the first time they are touched
myisam-recover-options  = BACKUP
#max_connections        = 100
#table_open_cache       = 64
#thread_concurrency     = 10
#
# * Query Cache Configuration
#
#query_cache_limit      = 1M
#query_cache_size        = 16M
#
# * Logging and Replication
#
# Both location gets rotated by the cronjob.
# Be aware that this log type is a performance killer.
# As of 5.1 you can enable the log at runtime!
#general_log_file        = /var/log/mysql/mysql.log
#general_log             = 1
#
# Error log - should be very few entries.
#
log_error = /var/log/mysql/error.log
#
# Here you can see queries with especially long duration
#slow_query_log         = 1
#slow_query_log_file    = /var/log/mysql/mysql-slow.log
#long_query_time = 2
#log-queries-not-using-indexes
#
# The following can be used as easy to replay backup logs or for replication.
# note: if you are setting up a replication slave, see README.Debian about
#       other settings you may need to change.
#server-id              = 1
#log_bin                        = /var/log/mysql/mysql-bin.log
expire_logs_days        = 10
max_binlog_size   = 100M
#binlog_do_db           = include_database_name
#binlog_ignore_db       = include_database_name
#
# * InnoDB
#
# InnoDB is enabled by default with a 10MB datafile in /var/lib/mysql/.
# Read the manual for more InnoDB related options. There are many!
#
# * Security Features
#
# Read the manual, too, if you want chroot!
# chroot = /var/lib/mysql/
#
# For generating SSL certificates I recommend the OpenSSL GUI "tinyca".
#
# ssl-ca=/etc/mysql/cacert.pem
# ssl-cert=/etc/mysql/server-cert.pem
# ssl-key=/etc/mysql/server-key.pem

root@kylin:~# service mysql start
 * Starting MySQL database server mysqld 
No directory, logging in with HOME=/
mkdir: cannot create directory ‘//.cache’: Permission denied
-su: 19: /etc/profile.d/wsl-integration.sh: cannot create //.cache/wslu/integration: Directory nonexistent
# 上述问题解决,将启动mysql处的 su mysql 换为 su root,问题解决!
root@kylin:/# vim /etc/init.d/MySQL
---
# Start MySQL!
#su - mysql -s /bin/sh -c "/usr/bin/mysqld_safe > /dev/null 2>&1 &"
su - root -s /bin/sh -c "/usr/bin/mysqld_safe > /dev/null 2>&1 &"
---

root@kylin:~# service mysql stop
 * Stopping MySQL database server mysqld                                                                              [ OK ]
root@kylin:~# service mysql start
 * Starting MySQL database server mysqld                                                                              [ OK ]
                                                                                                                      [ OK ]
root@kylin:~# mysql
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 4
Server version: 5.7.30-0ubuntu0.18.04.1-log (Ubuntu)

Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql> show databases;
+--------------------+
| Database           |
+--------------------+
| information_schema |
| ib_logfile         |
| mysql              |
| performance_schema |
| sys                |
+--------------------+
5 rows in set (0.01 sec)

mysql> use mysql;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Database changed
mysql> show tables;
+---------------------------+
| Tables_in_mysql           |
+---------------------------+
| columns_priv              |
| db                        |
| engine_cost               |
| event                     |
| func                      |
| general_log               |
| gtid_executed             |
| help_category             |
| help_keyword              |
| help_relation             |
| help_topic                |
| innodb_index_stats        |
| innodb_table_stats        |
| ndb_binlog_index          |
| plugin                    |
| proc                      |
| procs_priv                |
| proxies_priv              |
| server_cost               |
| servers                   |
| slave_master_info         |
| slave_relay_log_info      |
| slave_worker_info         |
| slow_log                  |
| tables_priv               |
| time_zone                 |
| time_zone_leap_second     |
| time_zone_name            |
| time_zone_transition      |
| time_zone_transition_type |
| user                      |
+---------------------------+
31 rows in set (0.00 sec)

mysql> select host,user,authentication_string,plugin from mysql.user;
+-----------+------------------+-------------------------------------------+-----------------------+
| host      | user             | authentication_string                     | plugin                |
+-----------+------------------+-------------------------------------------+-----------------------+
| localhost | root             |                                           | auth_socket           |
| localhost | mysql.session    | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | mysql.sys        | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | debian-sys-maint | *B886BE6360B8C1E19C15E8FF311E2588171905BE | mysql_native_password |
+-----------+------------------+-------------------------------------------+-----------------------+
4 rows in set (0.00 sec)

mysql> update mysql.user set authentication_string=password('123456'),plugin="mysql_native_password" where user='root' and host ='localhost';
Query OK, 1 row affected, 1 warning (0.01 sec)
Rows matched: 1  Changed: 1  Warnings: 1

mysql> select host,user,authentication_string,plugin from mysql.user;
+-----------+------------------+-------------------------------------------+-----------------------+
| host      | user             | authentication_string                     | plugin                |
+-----------+------------------+-------------------------------------------+-----------------------+
| localhost | root             | *6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9 | mysql_native_password |
| localhost | mysql.session    | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | mysql.sys        | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | debian-sys-maint | *B886BE6360B8C1E19C15E8FF311E2588171905BE | mysql_native_password |
+-----------+------------------+-------------------------------------------+-----------------------+
4 rows in set (0.00 sec)

mysql> GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123456' WITH GRANT OPTION;
Query OK, 0 rows affected, 1 warning (0.01 sec)

mysql> flush privileges;
Query OK, 0 rows affected (0.02 sec)

mysql> SHOW GRANTS FOR 'root'@'%';
+-------------------------------------------------------------+
| Grants for root@%                                           |
+-------------------------------------------------------------+
| GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION |
+-------------------------------------------------------------+
1 row in set (0.00 sec)

mysql> exit;
Bye
root@kylin:~# netstat -an | grep 3306
tcp6       0      0 :::3306                 :::*                    LISTEN
root@kylin:~# mysql -u root --password='123456' -e "select host,user,authentication_string,plugin from mysql.user"
mysql: [Warning] Using a password on the command line interface can be insecure.
+-----------+------------------+-------------------------------------------+-----------------------+
| host      | user             | authentication_string                     | plugin                |
+-----------+------------------+-------------------------------------------+-----------------------+
| localhost | root             | *6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9 | mysql_native_password |
| localhost | mysql.session    | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | mysql.sys        | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | debian-sys-maint | *B886BE6360B8C1E19C15E8FF311E2588171905BE | mysql_native_password |
| %         | root             | *6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9 | mysql_native_password |
+-----------+------------------+-------------------------------------------+-----------------------+
root@kylin:~# systemctl status mysql
System has not been booted with systemd as init system (PID 1). Can`t operate.
# 这是因为WSL是Windows的一个子服务,终端中无法重启Windows的服务.
# 我们无法使用reboot或使用systemctl命令来管理systemd服务。

WSL 下初始化服务

每次重启系统时执行,方便管理服务和IP

hadoop@kylin:/etc/hadoop/conf$ ifconfig eth0 | egrep -o "inet [^ ]*" | grep -o "[0-9.]*"

echo "$newip hadoop" >> /mnt/c/Windows/System32/drivers/etc/hosts
echo "$newip hadoop" >> /etc/hosts
------------------------------------------------------------------------------------------------------------------------

root@kylin:~# cat getip
#!/bin/bash
#getip
##############################
#WSL(Ubuntu) Ip Change
#Author kylin
##############################

newip=`ifconfig eth0 | egrep -o "inet [^ ]*" | grep -o "[0-9.]*"`
oldip=`cat /root/oldip | grep -o "[0-9.]*"`

if [[ $newip = $oldip ]];then
        sudo echo "Same IP address."
        sudo echo "Old ip is $oldip"
else    #windows下设置了对hosts文件的修改写入权限,多etc/文件夹下创建文件的权限没有成功,所以直接sed时创建临时文件不成功,因此采用下面办法!
        sudo cp /mnt/c/Windows/System32/drivers/etc/hosts /tmp/
        sudo sed -i "s/${oldip}/$newip/" /tmp/hosts
        sudo cat /tmp/hosts > /mnt/c/Windows/System32/drivers/etc/hosts
        sudo sed -i "s/${oldip}/$newip/" /etc/hosts
        sudo echo "New ip is $newip"
        sudo echo "$newip" > /root/oldip
fi
------------------------------------------------------------------------------------------------------------------------

#优化getip shell

#!/bin/bash
#getip
##############################
#WSL(Ubuntu) Ip Change
#Author kylin
##############################

newip=`ifconfig eth0 | egrep -o "inet [^ ]*" | grep -o "[0-9.]*"`
oldip=`cat /root/oldip | grep -o "[0-9.]*"`
hostname=`cat /etc/hosts | grep hadoop | grep -o "[a-z]*"`

if [[ $hostname = hadoop ]];then
        if [[ $newip = $oldip ]];then
                sudo echo "Same IP address."
                sudo echo "Old ip is $oldip"
        else
                sudo cp /mnt/c/Windows/System32/drivers/etc/hosts /tmp/
                sudo sed -i "s/${oldip}/$newip/" /tmp/hosts
                sudo cat /tmp/hosts > /mnt/c/Windows/System32/drivers/etc/hosts
                sudo sed -i "s/${oldip}/$newip/" /etc/hosts
                sudo echo "New ip is $newip"
                sudo echo "$newip" > /root/oldip
        fi
else
        sudo echo "$newip hadoop" >> /mnt/c/Windows/System32/drivers/etc/hosts
        sudo echo "$newip hadoop" >> /etc/hosts
        sudo echo "Add IP $newip is Succeed!"
fi

------------------------------------------------------------------------------------------------------------------------

root@kylin:~# cat initservice.sh
#!/bin/bash
#InitService
##############################
#Linux(Ubuntu) Service Enable
#Author kylin
##############################

WORK_DIR=$(pwd)
#Only root
[[ $EUID -ne 0 ]] && echo 'Error: This script must be run as root!' && exit 1

#IP Addr.
#ip=`ifconfig eth0 | egrep -o "inet [^ ]*" | grep -o "[0-9.]*"`
#echo "$ip  cloudera.kylin.com  cloudera" >> /etc/hosts

cd ${WORK_DIR}
/bin/bash ./getip.sh

#Enable ssh
service ssh start

#Disnable ufw
service ufw stop

#Enable Httpd
#service apache2 restart > /dev/null 2>&1
#service apache2 start

#Enable mysql service
service mysql start

安装部署Java

root@kylin:~# mkdir scpfile
root@kylin:~# scp [email protected]:/root/scpfile/packages/jdk-8u181-linux-x64.tar.gz ./scpfile
root@kylin:~# cd scpfile/
root@kylin:~/scpfile# mkdir -p /usr/java
root@kylin:~/scpfile# tar -zxf jdk-8u181-linux-x64.tar.gz -C /usr/java/
root@kylin:~/scpfile# cd /usr/java/
root@kylin:/usr/java# chown -R root:root jdk1.8.0_181/
root@kylin:/usr/java# ln -sf /usr/java/jdk1.8.0_181 /usr/java/latest
root@kylin:/usr/java# ln -sf /usr/java/latest /usr/java/default
root@kylin:/usr/java# sudo vim /etc/profile
---
export JAVA_HOME=/usr/java/default
export PATH=$JAVA_HOME/bin:$PATH

Shift+ZZ
---
root@kylin:/usr/java# source /etc/profile
root@kylin:/usr/java# java -version

Hadoop 3.2.1

# hadoop 3.2.1
root@kylin:/usr/java# cd /root
root@kylin:~# mkdir wget
root@kylin:~# cd wget/
root@kylin:~/wget# sudo wget https://mirror.bit.edu.cn/apache/hadoop/core/hadoop-3.2.1/hadoop-3.2.1.tar.gz
root@kylin:~/wget# mkdir /opt/hadoop
root@kylin:~/wget# tar -zxf hadoop-3.2.1.tar.gz -C /opt/hadoop/
root@kylin:~/wget# cd /root
root@kylin:~# sudo groupadd hadoop
root@kylin:~# sudo useradd -s /bin/bash -g hadoop -d /home/hadoop -m hadoop
root@kylin:~# id hadoop
uid=1001(hadoop) gid=1001(hadoop) groups=1001(hadoop)

root@kylin:~# cd /opt/hadoop
root@kylin:/opt/hadoop# ln -s hadoop-3.2.1 current
root@kylin:/opt/hadoop# chown -R hadoop:hadoop /opt/hadoop
root@kylin:/opt/hadoop# cd /root/
root@kylin:~# mkdir /etc/hadoop
root@kylin:~# cp -r /opt/hadoop/current/etc/hadoop /etc/hadoop/conf
root@kylin:~# chown -R hadoop:hadoop  /etc/hadoop
root@kylin:~# vim /home/hadoop/.bashrc
---
# User specific aliases and functions
export JAVA_HOME=/usr/java/default
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME=/opt/hadoop/current
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}
export HTTPFS_CATALINA_HOME=${HADOOP_HOME}/share/hadoop/httpfs/tomcat
export CATALINA_BASE=${HTTPFS_CATALINA_HOME}
export HADOOP_CONF_DIR=/etc/hadoop/conf
export HTTPFS_CONFIG=/etc/hadoop/conf
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
---

root@kylin:~# vim /etc/hadoop/conf/core-site.xml
---
<configuration>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://localhost:9000</value>
        </property>
</configuration>
---

root@kylin:~# vim /etc/hadoop/conf/hdfs-site.xml
---
<configuration>
        <property>
                <name>dfs.replication</name>
                <value>1</value>
        </property>
</configuration>
---

root@kylin:~# su hadoop
hadoop@kylin:/root$ cd /home/hadoop/
hadoop@kylin:~$ cd /opt/hadoop/current
hadoop@kylin:/opt/hadoop/current$ bin/hdfs namenode -format
# 进行多次格式化导致DataNode服务启动不成功解决办法
# 查看DataNode日志,然后删除current目录
# hadoop@kylin:/tmp/hadoop-hadoop/dfs/data$ rm -rf current/
hadoop@kylin:/opt/hadoop/current$ cd /etc/hadoop/conf/
hadoop@kylin:~$ hdfs --daemon start namenode
hadoop@kylin:~$ jps
4629 NameNode

hadoop@kylin:~$ hdfs --daemon start secondarynamenode
hadoop@kylin:~$ hdfs --daemon start datanode

hadoop@kylin:~$ vim /etc/hadoop/conf/mapred-site.xml
---
<configuration>
        <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
        </property>
        <property>
                <name>mapreduce.application.classpath</name>
                <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
        </property>
</configuration>
---

hadoop@kylin:~$ vim /etc/hadoop/conf/yarn-site.xml
---
<configuration>

<!-- Site specific YARN configuration properties -->
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>
        <property>
                <name>yarn.nodemanager.env-whitelist</name>
                <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
        </property>
</configuration>
---

hadoop@kylin:~$ yarn --daemon start resourcemanager
hadoop@kylin:~$ yarn --daemon start nodemanager
hadoop@kylin:~$ mapred  --daemon start historyserver
hadoop@kylin:~$ jps
7091 Jps
4629 NameNode
5946 DataNode
6829 JobHistoryServer
6653 NodeManager
5823 SecondaryNameNode
6335 ResourceManager

hadoop@kylin:~$ touch hadoopstart.sh
hadoop@kylin:~$ chmod +x hadoopstart.sh
hadoop@kylin:~$ vim hadoopstart.sh
hadoop@kylin:~$ cat hadoopstart.sh
#!/bin/bash

##############################
#Hadoopstart.sh
#Author kylin
##############################

$HADOOP_HOME/bin/hdfs --daemon start namenode
$HADOOP_HOME/bin/hdfs --daemon start secondarynamenode
$HADOOP_HOME/bin/hdfs --daemon start datanode
$HADOOP_HOME/bin/yarn --daemon start resourcemanager
$HADOOP_HOME/bin/yarn --daemon start nodemanager
$HADOOP_HOME/bin/yarn --daemon start proxyserver
$HADOOP_HOME/bin/mapred --daemon start historyserver


hadoop@kylin:~$ hadoop fs -ls /
Found 1 items
drwxrwx---   - hadoop supergroup          0 2020-05-29 02:06 /tmp
hadoop@kylin:~$ mkdir data
hadoop@kylin:~$ vim /home/hadoop/data/demo.txt
hadoop@kylin:~$ cat /home/hadoop/data/demo.txt
Linux Unix windows
hadoop Linux spark
hive hadoop Unix
MapReduce hadoop  Linux hive
windows hadoop spark
hadoop@kylin:~$ hadoop fs -mkdir /demo
hadoop@kylin:~$ hadoop fs -put /home/hadoop/data/demo.txt /demo
2020-05-29 03:01:21,722 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
hadoop@kylin:~$ hadoop fs -ls /demo
Found 1 items
-rw-r--r--   1 hadoop supergroup        105 2020-05-29 03:01 /demo/demo.txt
hadoop@kylin:~$ hadoop fs -cat /demo/demo.txt
2020-05-29 03:02:42,682 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
Linux Unix windows
hadoop Linux spark
hive hadoop Unix
MapReduce hadoop  Linux hive
windows hadoop spark
hadoop@kylin:~$ hadoop jar /opt/hadoop/current/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.1.jar wordcount /demo /output
2020-05-29 03:05:15,225 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
2020-05-29 03:05:17,213 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/hadoop/.staging/job_1590688933468_0001
2020-05-29 03:05:17,614 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-05-29 03:05:18,072 INFO input.FileInputFormat: Total input files to process : 1
2020-05-29 03:05:18,197 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-05-29 03:05:18,291 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-05-29 03:05:18,332 INFO mapreduce.JobSubmitter: number of splits:1
2020-05-29 03:05:18,739 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-05-29 03:05:18,816 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1590688933468_0001
2020-05-29 03:05:18,816 INFO mapreduce.JobSubmitter: Executing with tokens: []
2020-05-29 03:05:19,612 INFO conf.Configuration: resource-types.xml not found
2020-05-29 03:05:19,613 INFO resource.ResourceUtils: Unable to find 'resource-types.xml'.
2020-05-29 03:05:20,619 INFO impl.YarnClientImpl: Submitted application application_1590688933468_0001
2020-05-29 03:05:20,810 INFO mapreduce.Job: The url to track the job: http://kylin.localdomain:8088/proxy/application_1590688933468_0001/
2020-05-29 03:05:20,813 INFO mapreduce.Job: Running job: job_1590688933468_0001
2020-05-29 03:05:43,515 INFO mapreduce.Job: Job job_1590688933468_0001 running in uber mode : false
2020-05-29 03:05:43,519 INFO mapreduce.Job:  map 0% reduce 0%
2020-05-29 03:05:53,818 INFO mapreduce.Job:  map 100% reduce 0%
2020-05-29 03:06:03,986 INFO mapreduce.Job:  map 100% reduce 100%
2020-05-29 03:06:05,030 INFO mapreduce.Job: Job job_1590688933468_0001 completed successfully
2020-05-29 03:06:05,337 INFO mapreduce.Job: Counters: 54
        File System Counters
                FILE: Number of bytes read=95
                FILE: Number of bytes written=451609
                FILE: Number of read operations=0
                FILE: Number of large read operations=0
                FILE: Number of write operations=0
                HDFS: Number of bytes read=205
                HDFS: Number of bytes written=61
                HDFS: Number of read operations=8
                HDFS: Number of large read operations=0
                HDFS: Number of write operations=2
                HDFS: Number of bytes read erasure-coded=0
        Job Counters
                Launched map tasks=1
                Launched reduce tasks=1
                Data-local map tasks=1
                Total time spent by all maps in occupied slots (ms)=7398
                Total time spent by all reduces in occupied slots (ms)=8407
                Total time spent by all map tasks (ms)=7398
                Total time spent by all reduce tasks (ms)=8407
                Total vcore-milliseconds taken by all map tasks=7398
                Total vcore-milliseconds taken by all reduce tasks=8407
                Total megabyte-milliseconds taken by all map tasks=7575552
                Total megabyte-milliseconds taken by all reduce tasks=8608768
        Map-Reduce Framework
                Map input records=5
                Map output records=16
                Map output bytes=168
                Map output materialized bytes=95
                Input split bytes=100
                Combine input records=16
                Combine output records=7
                Reduce input groups=7
                Reduce shuffle bytes=95
                Reduce input records=7
                Reduce output records=7
                Spilled Records=14
                Shuffled Maps =1
                Failed Shuffles=0
                Merged Map outputs=1
                GC time elapsed (ms)=303
                CPU time spent (ms)=4620
                Physical memory (bytes) snapshot=487563264
                Virtual memory (bytes) snapshot=5311295488
                Total committed heap usage (bytes)=446693376
                Peak Map Physical memory (bytes)=297992192
                Peak Map Virtual memory (bytes)=2652413952
                Peak Reduce Physical memory (bytes)=189571072
                Peak Reduce Virtual memory (bytes)=2658881536
        Shuffle Errors
                BAD_ID=0
                CONNECTION=0
                IO_ERROR=0
                WRONG_LENGTH=0
                WRONG_MAP=0
                WRONG_REDUCE=0
        File Input Format Counters
                Bytes Read=105
        File Output Format Counters
                Bytes Written=61
hadoop@kylin:~$ hadoop fs -ls /output
Found 2 items
-rw-r--r--   1 hadoop supergroup          0 2020-05-29 03:06 /output/_SUCCESS
-rw-r--r--   1 hadoop supergroup         61 2020-05-29 03:06 /output/part-r-00000
hadoop@kylin:~$ hadoop fs -text /output/part-r-00000
2020-05-29 03:10:44,893 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
Linux   3
MapReduce       1
Unix    2
hadoop  4
hive    2
spark   2
windows 2


hadoop@kylin:~$ touch hadoopstop.sh
hadoop@kylin:~$ chmod +x hadoopstop.sh
hadoop@kylin:~$ vim hadoopstop.sh
hadoop@kylin:~$ cat hadoopstop.sh
#!/bin/bash

##############################
#Hadoopstart.sh
#Author kylin
##############################

$HADOOP_HOME/bin/mapred --daemon stop historyserver
$HADOOP_HOME/bin/yarn --daemon stop proxyserver
$HADOOP_HOME/bin/yarn --daemon stop nodemanager
$HADOOP_HOME/bin/yarn --daemon stop resourcemanager
$HADOOP_HOME/bin/hdfs --daemon stop datanode
$HADOOP_HOME/bin/hdfs --daemon stop secondarynamenode
$HADOOP_HOME/bin/hdfs --daemon stop namenode

hadoop@kylin:~$ hdfs dfsadmin -report
Configured Capacity: 269490393088 (250.98 GB)
Present Capacity: 251389489152 (234.12 GB)
DFS Remaining: 251389173760 (234.12 GB)
DFS Used: 315392 (308 KB)
DFS Used%: 0.00%
Replicated Blocks:
        Under replicated blocks: 0
        Blocks with corrupt replicas: 0
        Missing blocks: 0
        Missing blocks (with replication factor 1): 0
        Low redundancy blocks with highest priority to recover: 0
        Pending deletion blocks: 0
Erasure Coded Block Groups:
        Low redundancy block groups: 0
        Block groups with corrupt internal blocks: 0
        Missing block groups: 0
        Low redundancy blocks with highest priority to recover: 0
        Pending deletion blocks: 0

-------------------------------------------------
Live datanodes (1):

Name: 127.0.0.1:9866 (localhost)
Hostname: kylin.localdomain
Decommission Status : Normal
Configured Capacity: 269490393088 (250.98 GB)
DFS Used: 315392 (308 KB)
Non DFS Used: 4340232192 (4.04 GB)
DFS Remaining: 251389173760 (234.12 GB)
DFS Used%: 0.00%
DFS Remaining%: 93.28%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Fri May 29 09:35:35 CST 2020
Last Block Report: Fri May 29 08:58:44 CST 2020
Num of Blocks: 4

#https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/ClusterSetup.html

Hive 3.1.2

# hive 3.1.2
# https://www.cnblogs.com/weavepub/p/11130869.html
root@kylin:~/wget# sudo wget https://mirror.bit.edu.cn/apache/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz

#元数据库使用mysql
#创建元数据数据库(metadata)
root@kylin:~# mysql -u root --password='123456' -e "create database hive default character set utf8 DEFAULT COLLATE utf8_general_ci"
mysql: [Warning] Using a password on the command line interface can be insecure.
root@kylin:~# mysql -u root --password='123456' -e "GRANT ALL ON hive.* TO 'hive'@'%' IDENTIFIED BY '123456'"
mysql: [Warning] Using a password on the command line interface can be insecure.
root@kylin:~# mysql -u root --password='123456' -e "flush privileges"
mysql: [Warning] Using a password on the command line interface can be insecure.
root@kylin:~# mysql -u root --password='123456' -e "SHOW GRANTS FOR 'hive'@'%'"
mysql: [Warning] Using a password on the command line interface can be insecure.
+------------------------------------------------+
| Grants for hive@%                              |
+------------------------------------------------+
| GRANT USAGE ON *.* TO 'hive'@'%'               |
| GRANT ALL PRIVILEGES ON `hive`.* TO 'hive'@'%' |
+------------------------------------------------+
root@kylin:~#

root@kylin:~/wget# mkdir /opt/hive
root@kylin:~/wget# tar -zxf apache-hive-3.1.2-bin.tar.gz -C /opt/hive
root@kylin:~/wget# cd /opt/hive/
root@kylin:/opt/hive# ll
total 12
drwxr-xr-x  3 root root 4096 May 29 10:47 ./
drwxr-xr-x  4 root root 4096 May 29 10:47 ../
drwxr-xr-x 10 root root 4096 May 29 10:47 apache-hive-3.1.2-bin/
root@kylin:/opt/hive# mv apache-hive-3.1.2-bin hive-3.1.2
root@kylin:/opt/hive# ln -s hive-3.1.2 current
root@kylin:/opt/hive# chown -R hadoop:hadoop /opt/hive/
root@kylin:/opt/hive# ll
total 12
drwxr-xr-x  3 hadoop hadoop 4096 May 29 11:00 ./
drwxr-xr-x  4 root   root   4096 May 29 10:47 ../
lrwxrwxrwx  1 hadoop hadoop   10 May 29 11:00 current -> hive-3.1.2/
drwxr-xr-x 10 hadoop hadoop 4096 May 29 10:47 hive-3.1.2/

root@kylin:~# mkdir /etc/hive
root@kylin:~# cp -r /opt/hive/current/conf /etc/hive/conf
root@kylin:~# chown -R hadoop:hadoop /etc/hive/
root@kylin:~# vim /home/hadoop/.bashrc
---
# Hive
export HIVE_HOME=/opt/hive/current
export HCAT_HOME=$HIVE_HOME/hcatalog
export HIVE_CONF_DIR=/etc/hive/conf
export PATH=$PATH:$HIVE_HOME/bin
---
hadoop@kylin:/etc/hive/conf$ source /home/hadoop/.bashrc

root@kylin:~# cd /usr/share/java/
root@kylin:/usr/share/java# ll
total 992
drwxr-xr-x   2 root root    4096 May 28 18:41 ./
drwxr-xr-x 113 root root    4096 May 28 18:41 ../
-rw-r--r--   1 root root    2613 Feb 21  2019 libintl.jar
-rw-r--r--   1 root root 1002812 Feb 24  2018 mysql-connector-java-5.1.45.jar
lrwxrwxrwx   1 root root      31 Feb 24  2018 mysql-connector-java.jar -> mysql-connector-java-5.1.45.jar
lrwxrwxrwx   1 root root      24 Feb 24  2018 mysql.jar -> mysql-connector-java.jar
root@kylin:/usr/share/java# cp mysql-connector-java-5.1.45.jar /opt/hive/current/lib/
root@kylin:/usr/share/java# cd /opt/hive/current/lib/
root@kylin:/opt/hive/current/lib# chown hadoop:hadoop mysql-connector-java-5.1.45.jar
root@kylin:/opt/hive/current/lib# su hadoop
hadoop@kylin:/opt/hive/current/lib$ ln -s mysql-connector-java-5.1.45.jar mysql-connector-java.jar
hadoop@kylin:/opt/hive/current/lib$ cd ..
hadoop@kylin:/opt/hive/current$ hadoop fs -mkdir -p /user/hive/warehouse
hadoop@kylin:/opt/hive/current$ hadoop fs -chmod g+w /user/hive/warehouse
hadoop@kylin:/opt/hive/current$ hadoop fs -ls /user/hive/warehouse
hadoop@kylin:/opt/hive/current$ hadoop fs -ls /user/hive
Found 1 items
drwxrwxrwx   - hadoop supergroup          0 2020-05-29 11:43 /user/hive/warehouse
#由于hadoop启动后,HDFS上就有 /tmp目录,不然就需要创建创建并赋权:
#$HADOOP_HOME/bin/hadoop fs -mkdir       /tmp
#$HADOOP_HOME/bin/hadoop fs -chmod g+w   /tmp
#
hadoop@kylin:/opt/hive/current$ cd /etc/hive/conf/
hadoop@kylin:/etc/hive/conf$ pwd
/etc/hive/conf
hadoop@kylin:/etc/hive/conf$ mv hive-exec-log4j2.properties.template hive-exec-log4j2.properties
hadoop@kylin:/etc/hive/conf$ mv hive-log4j2.properties.template hive-log4j2.properties
hadoop@kylin:/etc/hive/conf$ mv hive-default.xml.template hive-default.xml
hadoop@kylin:/etc/hive/conf$ cp hive-default.xml hive-site.xml
hadoop@kylin:/etc/hive/conf$ mv hive-env.sh.template hive-env.sh

hadoop@kylin:/etc/hive/conf$ vim /etc/hive/conf/hive-env.sh
---
export HADOOP_HOME=/opt/hadoop/current
export HIVE_CONF_DIR=/etc/hive/conf
export HIVE_HOME=/opt/hive/current
export HIVE_AUX_JARS_PATH=$HIVE_HOME/lib
---

hadoop@kylin:/etc/hive/conf$ vim hive-site.xml
hadoop@kylin:/etc/hive/conf$ cat hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
        <property>
                <name>javax.jdo.option.ConnectionURL</name>
                <value>jdbc:mysql://localhost:3306/hive?allowMultiQueries=true&amp;useSSL=false&amp;verifyServerCertificate=false</value>
        </property>
        <property>
                <name>javax.jdo.option.ConnectionDriverName</name>
                <value>com.mysql.jdbc.Driver</value>
        </property>
        <property>
                <name>javax.jdo.option.ConnectionUserName</name>
                <value>hive</value>
        </property>
        <property>
                <name>javax.jdo.option.ConnectionPassword</name>
                <value>123456</value>
        </property>
        <property>
                <name>datanucleus.readOnlyDatastore</name>
                <value>false</value>
        </property>
        <property>
                <name>datanucleus.fixedDatastore</name>
                <value>false</value>
        </property>
        <property>
                <name>datanucleus.autoCreateSchema</name>
                <value>true</value>
        </property>
        <property>
                <name>datanucleus.autoCreateTables</name>
                <value>true</value>
        </property>
        <property>
                <name>datanucleus.autoCreateColumns</name>
                <value>true</value>
        </property>
</configuration>

hadoop@kylin:~$ $HIVE_HOME/bin/schematool -initSchema -dbType mysql
Exception in thread "main" java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)V

#处理上述报错
#解决方案:hive 3.2.1中默认的guava包为guava-19.0.jar($HIVE_HOME/lib/),在https://mvnrepository.com/artifact/com.google.guava/guava下载比较新的guava包替换即可。
#删除库中所有表:
SELECT concat('DROP TABLE IF EXISTS ', table_name, ';')
FROM information_schema.tables
WHERE table_schema = 'hive';

mysql> DROP TABLE IF EXISTS WM_RESOURCEPLAN;
ERROR 1217 (23000): Cannot delete or update a parent row: a foreign key constraint fails
#错误处理
mysql> SET FOREIGN_KEY_CHECKS = 0;
Query OK, 0 rows affected (0.00 sec)

mysql> DROP TABLE IF EXISTS WM_RESOURCEPLAN;
Query OK, 0 rows affected (0.03 sec)

mysql> SET FOREIGN_KEY_CHECKS = 1;
Query OK, 0 rows affected (0.00 sec)

hadoop@kylin:~$ $HIVE_HOME/bin/schematool -initSchema -dbType mysql
Metastore connection URL:        jdbc:mysql://localhost:3306/hive?allowMultiQueries=true&useSSL=false&verifyServerCertificate=false
Metastore Connection Driver :    com.mysql.jdbc.Driver
Metastore connection User:       hive
Starting metastore schema initialization to 3.1.0
Initialization script hive-schema-3.1.0.mysql.sql

Initialization script completed
schemaTool completed
#启动hive
hadoop@kylin:~$ $HIVE_HOME/bin/hive
Hive Session ID = 65b6e98e-b1df-4dbc-a8cc-1b3732893512

Logging initialized using configuration in file:/etc/hive/conf/hive-log4j2.properties Async: true
Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
Hive Session ID = 75771681-679c-4676-ab8a-c1cb1bd9e3d1
hive> show databases;
OK
default
Time taken: 0.129 seconds, Fetched: 1 row(s)
hive> quit;

hadoop@kylin:~$ $HIVE_HOME/bin/hiveserver2 &
[1] 8659
hadoop@kylin:~$ 2020-05-29 15:54:43: Starting HiveServer2
Hive Session ID = 2ec73789-52dc-4254-86a0-5a2346eca8c6
Hive Session ID = be6f9dcf-cdd2-459c-9d57-5d71158af1a7
Hive Session ID = 461cc314-6292-4776-9883-ba92a9b78dba
Hive Session ID = 4c4a5b6c-636a-45d0-98a0-2d43c3c7fe33

hadoop@kylin:~$ jps
8659 RunJar
309 SecondaryNameNode
520 NodeManager
376 DataNode
249 NameNode
604 JobHistoryServer
444 ResourceManager
8846 Jps

---
hadoop@kylin:/opt/hive/current/logs$ touch metastore.log
hadoop@kylin:/opt/hive/current/logs$ touch hiveserver2.log
hadoop@kylin:/opt/hive/current/logs$ chmod o+w hiveserver2.log
hadoop@kylin:/opt/hive/current/logs$ chmod o+w metastore.log

hadoop@kylin:~$ nohup hive --service metastore > $HIVE_HOME/logs/metastore.log 2>&1 &
[1] 12022
hadoop@kylin:~$ nohup  hive --service hiveserver2 > $HIVE_HOME/logs/hiveserver2.log 2>&1 &
[2] 12172
hadoop@kylin:~$ jps | grep RunJar
12022 RunJar
12172 RunJar
---

hadoop@kylin:~$ netstat -tulnp | grep 10000
(Not all processes could be identified, non-owned process info
 will not be shown, you would have to be root to see it all.)
tcp6       0      0 :::10000                :::*                    LISTEN      8659/java

hadoop@kylin:~$ vim /etc/hive/conf/hive-site.xml
---
        </property>
        <property>
                <name>hive.server2.authentication</name>
                <value>CUSTOM</value>
        </property>
        <property>
                <name>hive.server2.custom.authentication.class</name>
                <value>com.weiming.SampleAuth.SampleAuthenticator</value>
        </property>
        <property>
                <name>hive.server2.thrift.port</name>
                <value>10000</value>
                <description>TCP port number to listen on, default 10000</description>
        </property>
        <property>
                <name>hive.server2.authentication</name>
                <value>NOSASL</value>
        </property>
        # 在 CLI 命令行上显示当前数据库,以及查询表的信息
        <property>
                <name>hive.cli.print.header</name>
                <value>true</value>
        </property>
        <property>
                <name>hive.cli.print.current.db</name>
                <value>true</value>
        </property>
---

hadoop@kylin:~$ mkdir /opt/hive/current/SampleAuth && cd /opt/hive/current/SampleAuth
hadoop@kylin:/opt/hive/current/SampleAuth$ vim SampleAuthenticator.java
---
package com.weiming.SampleAuth;

import java.util.Hashtable;
import javax.security.sasl.AuthenticationException;
import org.apache.hive.service.auth.PasswdAuthenticationProvider;

/*
 *  javac -cp $HIVE_HOME/lib/hive-service-0.12.0-cdh5.0.0-beta-2.jar SampleAuthenticator.java -d .
 *  jar cf sampleauth.jar hive
 *  cp sampleauth.jar $HIVE_HOME/lib/.
 *  */


public class SampleAuthenticator implements PasswdAuthenticationProvider {
        Hashtable<String, String> store = null;
        public SampleAuthenticator () {
                store = new Hashtable<String, String>();
                store.put("user1", "passwd1");
                store.put("user2", "passwd2");
        }

        @Override
                public void Authenticate(String user, String  password)
                        throws AuthenticationException {
                        String storedPasswd = store.get(user);
                        if (storedPasswd != null && storedPasswd.equals(password))
                                return;
                        throw new AuthenticationException("SampleAuthenticator: Error validating user");
                }
}
---

hadoop@kylin:/opt/hive/current/SampleAuth$ javac -cp /opt/hive/current/lib/hive-service-3.1.2.jar SampleAuthenticator.java -d .
hadoop@kylin:/opt/hive/current/SampleAuth$ jar cf sampleauth.jar hive
hadoop@kylin:/opt/hive/current/SampleAuth$ ll
total 20
drwxrwxr-x  3 hadoop hadoop 4096 May 29 16:37 ./
drwxr-xr-x 12 hadoop hadoop 4096 May 29 16:08 ../
-rw-rw-r--  1 hadoop hadoop  960 May 29 16:16 SampleAuthenticator.java
drwxrwxr-x  3 hadoop hadoop 4096 May 29 16:36 com/
-rw-rw-r--  1 hadoop hadoop  342 May 29 16:41 sampleauth.jar
hadoop@kylin:/opt/hive/current/SampleAuth$ cp sampleauth.jar /opt/hive/current/lib/

hadoop@kylin:~$ vim /etc/hadoop/conf/core-site.xml
---
        <property>
                <name>hadoop.proxyuser.hadoop.hosts</name>
                <value>*</value>
        </property>
        <property>
                <name>hadoop.proxyuser.hadoop.groups</name>
                <value>*</value>
        </property>
---
#由于我管理及启动hive使用的用户组及用户为hadoop,所以设置为:hadoop.groups;hadoop.hosts。
hadoop@kylin:~$ hdfs dfsadmin -refreshSuperUserGroupsConfiguration
Refresh super user groups configuration successful
hadoop@kylin:~$ yarn rmadmin -refreshSuperUserGroupsConfiguration
2020-05-29 20:36:59,284 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8033

hadoop@kylin:~$ $HIVE_HOME/bin/beeline
Beeline version 3.1.2 by Apache Hive
beeline> !connect jdbc:hive2://hadoop:10000/default
Connecting to jdbc:hive2://hadoop:10000/default
Enter username for jdbc:hive2://hadoop:10000/default: user2
Enter password for jdbc:hive2://hadoop:10000/default: *******
20/05/29 22:51:20 [main]: WARN jdbc.HiveConnection: Failed to connect to hadoop:10000
Unexpected end of file when reading from HS2 server. The root cause might be too many concurrent connections. Please ask the administrator to check the number of active connections, and adjust hive.server2.thrift.max.worker.threads if applicable.
Error: Could not open client transport with JDBC Uri: jdbc:hive2://hadoop:10000/default: null (state=08S01,code=0)

beeline> !connect jdbc:hive2://hadoop:10000/default;auth=noSasl
Connecting to jdbc:hive2://hadoop:10000/default;auth=noSasl
Enter username for jdbc:hive2://hadoop:10000/default: user2
Enter password for jdbc:hive2://hadoop:10000/default: *******
20/05/29 22:51:39 [main]: WARN jdbc.HiveConnection: Failed to connect to hadoop:10000
Error: Could not open client transport with JDBC Uri: jdbc:hive2://hadoop:10000/default;auth=noSasl: Failed to open new session: java.lang.RuntimeException: org.apache.hadoop.security.AccessControlException: Permission denied: user=user2, access=EXECUTE, inode="/tmp":hadoop:supergroup:drwxrwx---

hadoop@kylin:~$ hdfs dfs -chmod o+rwx /tmp

beeline> !connect jdbc:hive2://hadoop:10000/default;auth=noSasl
Connecting to jdbc:hive2://hadoop:10000/default;auth=noSasl
Enter username for jdbc:hive2://hadoop:10000/default: user2
Enter password for jdbc:hive2://hadoop:10000/default: *******
Connected to: Apache Hive (version 3.1.2)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://hadoop:10000/default>

#此时就可以在DBever、DataGrip等客户端连接hive了。
#指定Schema时,后面加上字符串;auth=noSasl指定连接属性即可.
#jdbc:hive2://hadoop:10000/default;auth=noSasl
#

# hive -S  则不输出执行步骤日志
hadoop@kylin:~$ hive
Hive Session ID = e5823ff3-9557-4fc7-9a8d-0f77f5df3511

Logging initialized using configuration in file:/etc/hive/conf/hive-log4j2.properties Async: true
Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
Hive Session ID = dca74a09-3e5a-45ce-83ed-1fe8506211b7
# 如果环境中中有HBase,且hive输出大量INFO日志,则在HBase的conf路径下执行以下操作:
# sed -i 's/INFO/ERROR/' /etc/hbase/conf/log4j.properties
hive> desc function sum;
OK
sum(x) - Returns the sum of a set of numbers
Time taken: 4.3 seconds, Fetched: 1 row(s)
hive> create database test_bdw;
OK
Time taken: 0.97 seconds
hive> use test_bdw;
OK
Time taken: 0.178 seconds
hive> create table student(id int, name string) row format delimited fields terminated by '\t';
OK
Time taken: 2.69 seconds
hive> desc student;
OK
id                      int
name                    string
Time taken: 0.481 seconds, Fetched: 2 row(s)
hive> 

hadoop@kylin:/opt/hive/current/SampleAuth$ cd $HIVE_HOME
hadoop@kylin:/opt/hive/current$ mkdir warehouse/test_bdw
hadoop@kylin:/opt/hive/current$ touch warehouse/test_bdw/student.dat
hadoop@kylin:/opt/hive/current$ vim warehouse/test_bdw/student.dat
hadoop@kylin:/opt/hive/current$ cat warehouse/test_bdw/student.dat
001     david
002     fab
003     kaishen
004     josen
005     arvin
006     wada
007     weda
008     banana
009     arnold
010     simon
011     scott

hive> load data local inpath '/opt/hive/current/warehouse/test_bdw/student.dat' into table test_bdw.student;
Loading data to table test_bdw.student
OK
Time taken: 3.21 seconds
hive> select * from student;
OK
1       david
2       fab
3       kaishen
4       josen
5       arvin
6       wada
7       weda
8       banana
9       arnold
10      simon
11      scott
NULL    NULL
Time taken: 7.677 seconds, Fetched: 12 row(s)
hive>

hadoop@kylin:~$ hadoop fs -ls /
Found 4 items
drwxr-xr-x   - hadoop supergroup          0 2020-05-29 03:01 /demo
drwxr-xr-x   - hadoop supergroup          0 2020-05-29 03:06 /output
drwxrwxrwx   - hadoop supergroup          0 2020-05-29 15:47 /tmp
drwxr-xr-x   - hadoop supergroup          0 2020-05-29 14:05 /user
hadoop@kylin:~$ hadoop fs -ls /user
Found 1 items
drwxr-xr-x   - hadoop supergroup          0 2020-05-29 14:05 /user/hive
hadoop@kylin:~$ hadoop fs -ls /user/hive
Found 1 items
drwxrwxrwx   - hadoop supergroup          0 2020-05-29 23:51 /user/hive/warehouse
hadoop@kylin:~$ hadoop fs -ls /user/hive/warehouse
Found 1 items
drwxr-xr-x   - hadoop supergroup          0 2020-05-29 23:51 /user/hive/warehouse/test_bdw.db
hadoop@kylin:~$ hadoop fs -ls /user/hive/warehouse/test_bdw.db
Found 1 items
drwxr-xr-x   - hadoop supergroup          0 2020-05-29 23:59 /user/hive/warehouse/test_bdw.db/student
hadoop@kylin:~$ hadoop fs -ls /user/hive/warehouse/test_bdw.db/student
Found 1 items
-rw-r--r--   1 hadoop supergroup        111 2020-05-30 00:00 /user/hive/warehouse/test_bdw.db/student/student.dat
hadoop@kylin:~$ hadoop fs -cat /user/hive/warehouse/test_bdw.db/student/student.dat
2020-05-30 00:09:04,738 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
001     david
002     fab
003     kaishen
004     josen
005     arvin
006     wada
007     weda
008     banana
009     arnold
010     simon
011     scott

hadoop@kylin:~$ hadoop fs -text /user/hive/warehouse/test_bdw.db/student/student.dat
2020-05-30 00:09:28,852 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
001     david
002     fab
003     kaishen
004     josen
005     arvin
006     wada
007     weda
008     banana
009     arnold
010     simon
011     scott

hadoop@kylin:~$

#Running HCatalog
#https://cwiki.apache.org/confluence/display/Hive/GettingStarted
hadoop@kylin:~$ $HIVE_HOME/hcatalog/sbin/hcat_server.sh
Usage: /opt/hive/current/hcatalog/sbin/hcat_server.sh [--config confdir] COMMAND
  start  Start HCatalog Server
  stop   Stop HCatalog Server
hadoop@kylin:~$ $HIVE_HOME/hcatalog/sbin/hcat_server.sh start
Missing hive-site.xml, expected at [/opt/hive/current/conf/hive-site.xml]
hadoop@kylin:~$ cd /opt/hive/current/conf/
hadoop@kylin:/opt/hive/current/conf$ ll
total 340
drwxr-xr-x  2 hadoop hadoop   4096 May 29 10:47 ./
drwxr-xr-x 12 hadoop hadoop   4096 May 29 16:08 ../
-rw-r--r--  1 hadoop hadoop   1596 Aug 23  2019 beeline-log4j2.properties.template
-rw-r--r--  1 hadoop hadoop 300482 Aug 23  2019 hive-default.xml.template
-rw-r--r--  1 hadoop hadoop   2365 Aug 23  2019 hive-env.sh.template
-rw-r--r--  1 hadoop hadoop   2274 Aug 23  2019 hive-exec-log4j2.properties.template
-rw-r--r--  1 hadoop hadoop   3086 Aug 23  2019 hive-log4j2.properties.template
-rw-r--r--  1 hadoop hadoop   2060 Aug 23  2019 ivysettings.xml
-rw-r--r--  1 hadoop hadoop   3558 Aug 23  2019 llap-cli-log4j2.properties.template
-rw-r--r--  1 hadoop hadoop   7163 Aug 23  2019 llap-daemon-log4j2.properties.template
-rw-r--r--  1 hadoop hadoop   2662 Aug 23  2019 parquet-logging.properties
hadoop@kylin:/opt/hive/current/conf$ ln -s $HIVE_CONF_DIR/hive-site.xml hive-site.xml
hadoop@kylin:/opt/hive/current/conf$ ll
total 340
drwxr-xr-x  2 hadoop hadoop   4096 May 30 14:13 ./
drwxr-xr-x 12 hadoop hadoop   4096 May 29 16:08 ../
-rw-r--r--  1 hadoop hadoop   1596 Aug 23  2019 beeline-log4j2.properties.template
-rw-r--r--  1 hadoop hadoop 300482 Aug 23  2019 hive-default.xml.template
-rw-r--r--  1 hadoop hadoop   2365 Aug 23  2019 hive-env.sh.template
-rw-r--r--  1 hadoop hadoop   2274 Aug 23  2019 hive-exec-log4j2.properties.template
-rw-r--r--  1 hadoop hadoop   3086 Aug 23  2019 hive-log4j2.properties.template
lrwxrwxrwx  1 hadoop hadoop     28 May 30 14:13 hive-site.xml -> /etc/hive/conf/hive-site.xml
-rw-r--r--  1 hadoop hadoop   2060 Aug 23  2019 ivysettings.xml
-rw-r--r--  1 hadoop hadoop   3558 Aug 23  2019 llap-cli-log4j2.properties.template
-rw-r--r--  1 hadoop hadoop   7163 Aug 23  2019 llap-daemon-log4j2.properties.template
-rw-r--r--  1 hadoop hadoop   2662 Aug 23  2019 parquet-logging.properties
hadoop@kylin:/opt/hive/current/conf$ cd -
/home/hadoop
hadoop@kylin:~$ $HIVE_HOME/hcatalog/sbin/hcat_server.sh start
Started metastore server init, testing if initialized correctly...
/opt/hive/current/hcatalog/sbin/hcat_server.sh: line 91: /opt/hive/current/hcatalog/sbin/../var/log/hcat.out: No such file or directory
Metastore startup failed, see /opt/hive/current/hcatalog/sbin/../var/log/hcat.err
hadoop@kylin:~$ cd /opt/hive/current/hcatalog/sbin/../
hadoop@kylin:/opt/hive/current/hcatalog$ ll
total 28
drwxr-xr-x  7 hadoop hadoop 4096 May 29 10:47 ./
drwxr-xr-x 12 hadoop hadoop 4096 May 29 16:08 ../
drwxr-xr-x  2 hadoop hadoop 4096 May 29 10:47 bin/
drwxr-xr-x  4 hadoop hadoop 4096 May 29 10:47 etc/
drwxr-xr-x  2 hadoop hadoop 4096 May 29 10:47 libexec/
drwxr-xr-x  2 hadoop hadoop 4096 May 29 10:47 sbin/
drwxr-xr-x  5 hadoop hadoop 4096 May 29 10:47 share/

hadoop@kylin:/opt/hive/current/hcatalog$ mkdir -p var/log
hadoop@kylin:/opt/hive/current/hcatalog$ cd /home/hadoop/
hadoop@kylin:~$ $HIVE_HOME/hcatalog/sbin/hcat_server.sh start
Started metastore server init, testing if initialized correctly...
Metastore initialized successfully on port[9083].
hadoop@kylin:~$ jps
1585 RunJar
1218 NodeManager
2466 Jps
995 SecondaryNameNode
932 NameNode
1431 JobHistoryServer
1065 DataNode
2281 RunJar
1147 ResourceManager
hadoop@kylin:~$ netstat -an | grep 9083
tcp6       0      0 :::9083                 :::*                    LISTEN
hadoop@kylin:~$ cd /opt/hive/current/hcatalog/sbin/../
hadoop@kylin:/opt/hive/current/hcatalog$ ll
total 32
drwxr-xr-x  8 hadoop hadoop 4096 May 30 14:18 ./
drwxr-xr-x 12 hadoop hadoop 4096 May 29 16:08 ../
drwxr-xr-x  2 hadoop hadoop 4096 May 29 10:47 bin/
drwxr-xr-x  4 hadoop hadoop 4096 May 29 10:47 etc/
drwxr-xr-x  2 hadoop hadoop 4096 May 29 10:47 libexec/
drwxr-xr-x  2 hadoop hadoop 4096 May 29 10:47 sbin/
drwxr-xr-x  5 hadoop hadoop 4096 May 29 10:47 share/
drwxrwxr-x  3 hadoop hadoop 4096 May 30 14:18 var/
hadoop@kylin:/opt/hive/current/hcatalog$ cd var/log/
hadoop@kylin:/opt/hive/current/hcatalog/var/log$ ll
total 24
drwxrwxr-x 2 hadoop hadoop 4096 May 30 14:18 ./
drwxrwxr-x 3 hadoop hadoop 4096 May 30 14:18 ../
-rw-rw-r-- 1 hadoop hadoop    0 May 30 14:18 hcat.err
-rw-rw-r-- 1 hadoop hadoop   52 May 30 14:18 hcat.out
-rw-rw-r-- 1 hadoop hadoop    5 May 30 14:18 hcat.pid
-rw-rw-r-- 1 hadoop hadoop 7061 May 30 14:18 hcat_gc.log-202005301418

hadoop@kylin:~$ $HIVE_HOME/hcatalog/bin/hcat
2020-05-30 14:56:26,045 INFO conf.HiveConf: Found configuration file file:/etc/hive/conf/hive-site.xml
Hive Session ID = 8871291c-bf98-40ae-a314-1ec5f27045b0
2020-05-30 14:56:33,250 INFO SessionState: Hive Session ID = 8871291c-bf98-40ae-a314-1ec5f27045b0
2020-05-30 14:56:37,131 INFO session.SessionState: Created HDFS directory: /tmp/hive/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0
2020-05-30 14:56:37,187 INFO session.SessionState: Created local directory: /tmp/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0
2020-05-30 14:56:37,204 INFO session.SessionState: Created HDFS directory: /tmp/hive/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0/_tmp_space.db
usage: hcat { -e "" | -f "" } [ -g "" ] [ -p "" ] [ -D"=" ]
 -D <property=value>   use hadoop value for given property
 -e <exec>             hcat command given from command line
 -f <file>             hcat commands in file
 -g <group>            group for the db/table specified in CREATE statement
 -h,--help             Print help information
 -p <perms>            permissions for the db/table specified in CREATE statement
2020-05-30 14:56:37,287 INFO session.SessionState: Deleted directory: /tmp/hive/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0 on fs with scheme hdfs
2020-05-30 14:56:37,300 INFO session.SessionState: Deleted directory: /tmp/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0 on fs with scheme file
2020-05-30 14:56:37,593 INFO conf.MetastoreConf: Found configuration file file:/etc/hive/conf/hive-site.xml
2020-05-30 14:56:37,596 INFO conf.MetastoreConf: Unable to find config file hivemetastore-site.xml
2020-05-30 14:56:37,596 INFO conf.MetastoreConf: Found configuration file null
2020-05-30 14:56:37,599 INFO conf.MetastoreConf: Unable to find config file metastore-site.xml
2020-05-30 14:56:37,599 INFO conf.MetastoreConf: Found configuration file null
2020-05-30 14:56:37,758 INFO session.SessionState: Failed to remove classloaders from DataNucleus
java.lang.NullPointerException
。。。。。

hadoop@kylin:~$ cd $HIVE_HOME
#在哪个目录下运行命令就会在哪个目录下产生log
hadoop@kylin:/opt/hive/current$ $HIVE_HOME/hcatalog/sbin/webhcat_server.sh
Lenght of string is non zero
usage: /opt/hive/current/hcatalog/sbin/webhcat_server.sh [start|startDebug|stop|foreground]
  start           Start the Webhcat Server
  startDebug      Start the Webhcat Server listening for debugger on port 5005
  stop            Stop the Webhcat Server
  foreground      Run the Webhcat Server in the foreground
hadoop@kylin:/opt/hive/current$ $HIVE_HOME/hcatalog/sbin/webhcat_server.sh start
Lenght of string is non zero
webhcat: starting ...
webhcat: /opt/hadoop/current/bin/hadoop jar /opt/hive/hive-3.1.2/hcatalog/sbin/../share/webhcat/svr/lib/hive-webhcat-3.1.2.jar org.apache.hive.hcatalog.templeton.Main
webhcat: starting ... started.
webhcat: done
hadoop@kylin:/opt/hive/current$ ll
total 564
drwxr-xr-x 12 hadoop hadoop   4096 May 30 15:00 ./
drwxr-xr-x  3 hadoop hadoop   4096 May 29 11:00 ../
-rw-r--r--  1 hadoop hadoop  20798 Aug 23  2019 LICENSE
-rw-r--r--  1 hadoop hadoop    230 Aug 23  2019 NOTICE
-rw-r--r--  1 hadoop hadoop   2469 Aug 23  2019 RELEASE_NOTES.txt
drwxrwxr-x  3 hadoop hadoop   4096 May 29 23:32 SampleAuth/
drwxr-xr-x  3 hadoop hadoop   4096 May 29 10:47 bin/
drwxr-xr-x  2 hadoop hadoop   4096 May 29 10:47 binary-package-licenses/
drwxr-xr-x  2 hadoop hadoop   4096 May 30 14:48 conf/
drwxr-xr-x  4 hadoop hadoop   4096 May 29 10:47 examples/
drwxr-xr-x  8 hadoop hadoop   4096 May 30 14:18 hcatalog/
drwxr-xr-x  2 hadoop hadoop   4096 May 29 10:47 jdbc/
drwxr-xr-x  4 hadoop hadoop  16384 May 29 16:42 lib/
drwxr-xr-x  4 hadoop hadoop   4096 May 29 10:47 scripts/
drwxrwxr-x  3 hadoop hadoop   4096 May 29 23:54 warehouse/
-rw-rw-r--  1 hadoop hadoop      0 May 30 15:00 webhcat-console-error.log
-rw-rw-r--  1 hadoop hadoop     35 May 30 15:00 webhcat-console.log
-rw-rw-r--  1 hadoop hadoop 472856 May 30 15:02 webhcat.log
-rw-rw-r--  1 hadoop hadoop      5 May 30 15:02 webhcat.pid
hadoop@kylin:/opt/hive/current$ $HIVE_HOME/hcatalog/sbin/webhcat_server.sh stop
Lenght of string is non zero
webhcat: stopping ...
webhcat: stopping ... stopped
webhcat: done
hadoop@kylin:/opt/hive/current$

# HiveSQL相关操作
# https://cwiki.apache.org/confluence/display/Hive/GettingStarted
# 
#Simple Example Use Cases

CREATE TABLE u_data (
  userid INT,
  movieid INT,
  rating INT,
  unixtime STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE;

hadoop@kylin:~/data$ wget http://files.grouplens.org/datasets/movielens/ml-100k.zip
hadoop@kylin:~/data$ unzip ml-100k.zip

hive> load data local inpath '/home/hadoop/data/ml-100k/u.data' overwrite into table u_data;

hive> SELECT * FROM u_data limit 5;
OK
196     242     3       881250949
186     302     3       891717742
22      377     1       878887116
244     51      2       880606923
166     346     1       886397596
Time taken: 0.549 seconds, Fetched: 5 row(s)
hive> desc u_data;
OK
userid                  int
movieid                 int
rating                  int
unixtime                string
Time taken: 0.171 seconds, Fetched: 4 row(s)

---
hadoop@kylin:~/data$ cat weekday_mapper.py
import sys
import datetime

for line in sys.stdin:
    line = line.strip()
    userid, movieid, rating, unixtime = line.split('\t')
    weekday = datetime.datetime.fromtimestamp(float(unixtime)).isoweekday()
    print ('\t'.join([userid, movieid, rating, str(weekday)]))
---

hive> INSERT OVERWRITE TABLE u_data_new
    > SELECT
    > TRANSFORM (userid, movieid, rating, unixtime)
    > USING 'python3 weekday_mapper.py'
    > AS (userid, movieid, rating, weekday)
    > FROM u_data;

hive> SELECT weekday, COUNT(*)
    > FROM u_data_new
    > GROUP BY weekday;
Query ID = hadoop_20200530192253_3d5afdf4-0d16-4ac1-800b-0d612d099beb
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1590818128923_0009, Tracking URL = http://kylin.localdomain:8088/proxy/application_1590818128923_0009/
Kill Command = /opt/hadoop/current/bin/mapred job  -kill job_1590818128923_0009
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2020-05-30 19:23:50,575 Stage-1 map = 0%,  reduce = 0%
2020-05-30 19:24:08,948 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 13.7 sec
2020-05-30 19:24:20,636 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 20.63 sec
MapReduce Total cumulative CPU time: 20 seconds 630 msec
Ended Job = job_1590818128923_0009
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1  Reduce: 1   Cumulative CPU: 20.63 sec   HDFS Read: 1192395 HDFS Write: 227 SUCCESS
Total MapReduce CPU Time Spent: 20 seconds 630 msec
OK
1       12254
2       13579
3       14430
4       15114
5       14743
6       18229
7       11651
Time taken: 89.704 seconds, Fetched: 7 row(s)
hive> select * from u_data_new limit 5;
OK
196     242     3       4
186     302     3       7
22      377     1       5
244     51      2       4
166     346     1       1
Time taken: 0.496 seconds, Fetched: 5 row(s)
hive>

HBase 2.2.4

#HBase 2.2.4
#root@kylin:~/wget# wget https://mirrors.tuna.tsinghua.edu.cn/apache/hbase/stable/hbase-2.2.4-bin.tar.gz
#
root@kylin:~/wget# ll
total 841436
drwxr-xr-x 2 root root      4096 May 30 20:02 ./
drwx------ 8 root root      4096 May 30 20:06 ../
-rw-r--r-- 1 root root 278813748 Aug 27  2019 apache-hive-3.1.2-bin.tar.gz
-rw-r--r-- 1 root root 359196911 Sep 23  2019 hadoop-3.2.1.tar.gz
-rw-r--r-- 1 root root 223600848 Mar 20 16:25 hbase-2.2.4-bin.tar.gz
root@kylin:~/wget# mkdir /opt/hbase
root@kylin:~/wget# tar -zxf hbase-2.2.4-bin.tar.gz -C /opt/hbase
root@kylin:~/wget# cd /opt/hbase/
root@kylin:/opt/hbase# ll
total 12
drwxr-xr-x 3 root root 4096 May 30 20:23 ./
drwxr-xr-x 5 root root 4096 May 30 20:23 ../
drwxr-xr-x 6 root root 4096 May 30 20:23 hbase-2.2.4/
root@kylin:/opt/hbase# ln -s hbase-2.2.4 current
root@kylin:/opt/hbase# chown -R hadoop:hadoop /opt/hbase/
root@kylin:/opt/hbase# ll
total 12
drwxr-xr-x 3 hadoop hadoop 4096 May 30 20:25 ./
drwxr-xr-x 5 root   root   4096 May 30 20:23 ../
lrwxrwxrwx 1 hadoop hadoop   11 May 30 20:25 current -> hbase-2.2.4/
drwxr-xr-x 6 hadoop hadoop 4096 May 30 20:23 hbase-2.2.4/
root@kylin:/opt/hbase# ll hbase-2.2.4/
total 944
drwxr-xr-x 6 hadoop hadoop   4096 May 30 20:23 ./
drwxr-xr-x 3 hadoop hadoop   4096 May 30 20:25 ../
-rw-r--r-- 1 hadoop hadoop 164157 Mar 11 12:25 CHANGES.md
-rw-rw-r-- 1 hadoop hadoop    262 May  2  2018 LEGAL
-rw-rw-r-- 1 hadoop hadoop 129312 Mar 11 13:01 LICENSE.txt
-rw-rw-r-- 1 hadoop hadoop 520601 Mar 11 13:01 NOTICE.txt
-rw-r--r-- 1 hadoop hadoop   1477 Jan 20 13:23 README.txt
-rw-r--r-- 1 hadoop hadoop 101401 Mar 11 12:25 RELEASENOTES.md
drwxr-xr-x 4 hadoop hadoop   4096 Mar 11 11:17 bin/
drwxr-xr-x 2 hadoop hadoop   4096 Jan 20 13:46 conf/
drwxr-xr-x 7 hadoop hadoop   4096 Mar 11 12:58 hbase-webapps/
drwxr-xr-x 6 hadoop hadoop  12288 May 30 20:24 lib/
root@kylin:/opt/hbase# mkdir /etc/hbase
root@kylin:/opt/hbase# cp -r /opt/hbase/current/conf /etc/hbase/
root@kylin:/opt/hbase# chown -R hadoop:hadoop /etc/hbase/

root@kylin:~# vim /home/hadoop/.bashrc
---
# HBase
export HBASE_HOME=/opt/hbase/current
export HBASE_CONF_DIR=/etc/hbase/conf
export PATH=$PATH:$HBASE_HOME/bin
---
root@kylin:~# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop

hadoop@kylin:~$ vim /etc/hbase/conf/hbase-env.sh
---
export JAVA_HOME=/usr/java/default/

---
#伪分布式设置
#http://hbase.apache.org/book.html#quickstart
#
hadoop@kylin:~$ vim /etc/hbase/conf/hbase-site.xml
---
<property>
        <name>hbase.cluster.distributed</name>
        <value>true</value>
</property>
<property>
        <name>hbase.rootdir</name>
        <value>hdfs://localhost:9000/hbase</value>
</property>
<property>
        <name>hbase.zookeeper.property.dataDir</name>
        <value>/opt/hbase/current/data/zookeeper</value>
</property>
<property>
        <name>hbase.master.maxclockskew</name>
        <value>180000</value>
        <description>Time difference of regionserver from master (NTP)</description>
</property>
<property>
        <name>hbase.zookeeper.quorum</name>
        #localhost:2181
        #之后改为localhost 后启动kylin成功!
        <value>localhost</value>
        <description>多个zk的话,用逗号分隔</description>
</property>
---
hadoop@kylin:~$ cat /etc/hbase/conf/regionservers
localhost

#hadoop@kylin:~$ $HBASE_HOME/bin/start-hbase.sh

# Start
#$HBASE_HOME/bin/hbase-daemon.sh start zookeeper
$ZK_HOME/bin/zkServer.sh start
$HBASE_HOME/bin/hbase-daemon.sh start master
$HBASE_HOME/bin/hbase-daemon.sh start regionserver

#stop
$HBASE_HOME/bin/hbase-daemon.sh stop regionserver
$HBASE_HOME/bin/hbase-daemon.sh stop master
$ZK_HOME/bin/zkServer.sh stop
#$HBASE_HOME/bin/hbase-daemon.sh stop zookeeper

hadoop@kylin:/opt/hbase/current/lib$ hbase version
/opt/hadoop/current/libexec/hadoop-functions.sh: line 2366: HADOOP_ORG.APACHE.HADOOP.HBASE.UTIL.GETJAVAPROPERTY_USER: bad substitution
/opt/hadoop/current/libexec/hadoop-functions.sh: line 2461: HADOOP_ORG.APACHE.HADOOP.HBASE.UTIL.GETJAVAPROPERTY_OPTS: bad substitution
#上述错误替换提示的输出,则对/opt/hadoop/current/libexec/hadoop-functions.sh文件中的对应块进行屏蔽了!!!(2366 2461)
HBase 2.2.4
Source code repository git://hao-OptiPlex-7050/home/hao/open_source/hbase revision=67779d1a325a4f78a468af3339e73bf075888bac
Compiled by hao on 2020年 03月 11日 星期三 12:57:39 CST
From source with checksum 19ada8ab3844a5aa8ccaacdd5f2893ca

hadoop@kylin:~$ jps
30689 JobHistoryServer
30289 ResourceManager
12419 HRegionServer
6708 RunJar
30073 NameNode
30362 NodeManager
30139 SecondaryNameNode
12539 Jps
31356 DataNode
10956 QuorumPeerMain
11853 HMaster

# hbase shell
hadoop@kylin:/opt/hbase/current/bin$ hbase shell
HBase Shell
Use "help" to get list of supported commands.
Use "exit" to quit this interactive shell.
For Reference, please visit: http://hbase.apache.org/2.0/book.html#shell
Version 2.2.4, r67779d1a325a4f78a468af3339e73bf075888bac, 2020年 03月 11日 星期三 12:57:39 CST
Took 0.0112 seconds

hbase(main):003:0> table_help
Help for table-reference commands.

You can either create a table via 'create' and then manipulate the table via commands like 'put', 'get', etc.
See the standard help information for how to use each of these commands.

However, as of 0.96, you can also get a reference to a table, on which you can invoke commands.
For instance, you can get create a table and keep around a reference to it via:

   hbase> t = create 't', 'cf'

Or, if you have already created the table, you can get a reference to it:

   hbase> t = get_table 't'

You can do things like call 'put' on the table:

  hbase> t.put 'r', 'cf:q', 'v'

which puts a row 'r' with column family 'cf', qualifier 'q' and value 'v' into table t.

To read the data out, you can scan the table:

  hbase> t.scan

which will read all the rows in table 't'.

Essentially, any command that takes a table name can also be done via table reference.
Other commands include things like: get, delete, deleteall,
get_all_columns, get_counter, count, incr. These functions, along with
the standard JRuby object methods are also available via tab completion.

For more information on how to use each of these commands, you can also just type:

   hbase> t.help 'scan'

which will output more information on how to use that command.

You can also do general admin actions directly on a table; things like enable, disable,
flush and drop just by typing:

   hbase> t.enable
   hbase> t.flush
   hbase> t.disable
   hbase> t.drop

Note that after dropping a table, your reference to it becomes useless and further usage
is undefined (and not recommended).
Took 0.0030 seconds
hbase(main):004:0>

Phoenix 5.0.0

# phoenix (5.0.0)
# 5.0.0支持Apache HBase 2.0
root@kylin:~/wget# wget https://mirror.bit.edu.cn/apache/phoenix/apache-phoenix-5.0.0-HBase-2.0/bin/apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz
--2020-05-31 07:07:07--  https://mirror.bit.edu.cn/apache/phoenix/apache-phoenix-5.0.0-HBase-2.0/bin/apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz
Resolving mirror.bit.edu.cn (mirror.bit.edu.cn)... 219.143.204.117, 202.204.80.77, 2001:da8:204:1205::22
Connecting to mirror.bit.edu.cn (mirror.bit.edu.cn)|219.143.204.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 436868323 (417M) [application/octet-stream]
Saving to: ‘apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz’

apache-phoenix-5.0.0-HBase-2.0-bin.t  25%[================>                                                   ] 107.58M  3.70MB/s    eta 87s

root@kylin:~/wget# mkdir /opt/phoenix
root@kylin:~/wget# tar -zxf apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz -C /opt/phoenix/
root@kylin:~/wget# cd /opt/phoenix/
root@kylin:/opt/phoenix# ll
total 12
drwxr-xr-x 3 root root  4096 May 31 22:49 ./
drwxr-xr-x 8 root root  4096 May 31 22:49 ../
drwxr-xr-x 5  502 staff 4096 Jun 27  2018 apache-phoenix-5.0.0-HBase-2.0-bin/
root@kylin:/opt/phoenix# ln -s apache-phoenix-5.0.0-HBase-2.0-bin phoenix-5.0.0-HBase-2.0
root@kylin:/opt/phoenix# ll
total 12
drwxr-xr-x 3 root root  4096 May 31 22:51 ./
drwxr-xr-x 8 root root  4096 May 31 22:49 ../
drwxr-xr-x 5  502 staff 4096 Jun 27  2018 apache-phoenix-5.0.0-HBase-2.0-bin/
lrwxrwxrwx 1 root root    34 May 31 22:51 phoenix-5.0.0-HBase-2.0 -> apache-phoenix-5.0.0-HBase-2.0-bin/
root@kylin:/opt/phoenix# ln -s phoenix-5.0.0-HBase-2.0 current
root@kylin:/opt/phoenix# chown -R hadoop:hadoop /opt/phoenix/
root@kylin:/opt/phoenix# ll current
lrwxrwxrwx 1 hadoop hadoop 23 May 31 22:51 current -> phoenix-5.0.0-HBase-2.0/
root@kylin:/opt/phoenix# ll current/
total 474596
drwxr-xr-x 5 hadoop hadoop      4096 Jun 27  2018 ./
drwxr-xr-x 3 hadoop hadoop      4096 May 31 22:51 ../
-rw-r--r-- 1 hadoop hadoop    144163 Jun 27  2018 LICENSE
-rw-r--r-- 1 hadoop hadoop     10509 Jun 27  2018 NOTICE
-rw-r--r-- 1 hadoop hadoop      1150 Jun 27  2018 README.md
drwxr-xr-x 4 hadoop hadoop      4096 May 31 22:50 bin/
drwxr-xr-x 3 hadoop hadoop      4096 May 31 22:50 examples/
-rw-r--r-- 1 hadoop hadoop 135109092 Jun 27  2018 phoenix-5.0.0-HBase-2.0-client.jar
-rw-r--r-- 1 hadoop hadoop 110858350 Jun 27  2018 phoenix-5.0.0-HBase-2.0-hive.jar
-rw-r--r-- 1 hadoop hadoop 137968668 Jun 27  2018 phoenix-5.0.0-HBase-2.0-pig.jar
-rw-r--r-- 1 hadoop hadoop   7931132 Jun 27  2018 phoenix-5.0.0-HBase-2.0-queryserver.jar
-rw-r--r-- 1 hadoop hadoop  41800313 Jun 27  2018 phoenix-5.0.0-HBase-2.0-server.jar
-rw-r--r-- 1 hadoop hadoop  34159398 Jun 27  2018 phoenix-5.0.0-HBase-2.0-thin-client.jar
-rw-r--r-- 1 hadoop hadoop   2582830 Jun 27  2018 phoenix-core-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop   2464127 Jun 27  2018 phoenix-core-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop   4317285 Jun 27  2018 phoenix-core-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop     30171 Jun 27  2018 phoenix-flume-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop     37284 Jun 27  2018 phoenix-flume-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop     47912 Jun 27  2018 phoenix-flume-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop     85153 Jun 27  2018 phoenix-hive-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop     78698 Jun 27  2018 phoenix-hive-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop    139358 Jun 27  2018 phoenix-hive-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop    701717 Jun 27  2018 phoenix-kafka-5.0.0-HBase-2.0-minimal.jar
-rw-r--r-- 1 hadoop hadoop     17292 Jun 27  2018 phoenix-kafka-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop     23673 Jun 27  2018 phoenix-kafka-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop     27314 Jun 27  2018 phoenix-kafka-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop     13043 Jun 27  2018 phoenix-load-balancer-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop     22960 Jun 27  2018 phoenix-load-balancer-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop   3649883 Jun 27  2018 phoenix-pherf-5.0.0-HBase-2.0-minimal.jar
-rw-r--r-- 1 hadoop hadoop    117898 Jun 27  2018 phoenix-pherf-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop     71056 Jun 27  2018 phoenix-pherf-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop    166993 Jun 27  2018 phoenix-pherf-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop     29760 Jun 27  2018 phoenix-pig-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop     46425 Jun 27  2018 phoenix-pig-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop     45806 Jun 27  2018 phoenix-pig-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop     22595 Jun 27  2018 phoenix-queryserver-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop     59481 Jun 27  2018 phoenix-queryserver-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop     30825 Jun 27  2018 phoenix-queryserver-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop     13491 Jun 27  2018 phoenix-queryserver-client-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop     10914 Jun 27  2018 phoenix-queryserver-client-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop     16785 Jun 27  2018 phoenix-queryserver-client-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop      3569 Jun 27  2018 phoenix-spark-5.0.0-HBase-2.0-javadoc.jar
-rw-r--r-- 1 hadoop hadoop     25584 Jun 27  2018 phoenix-spark-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop    127007 Jun 27  2018 phoenix-spark-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop     88764 Jun 27  2018 phoenix-spark-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop   2730675 Jun 27  2018 phoenix-tracing-webapp-5.0.0-HBase-2.0-runnable.jar
-rw-r--r-- 1 hadoop hadoop     11826 Jun 27  2018 phoenix-tracing-webapp-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop      8043 Jun 27  2018 phoenix-tracing-webapp-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop     16290 Jun 27  2018 phoenix-tracing-webapp-5.0.0-HBase-2.0.jar
drwxr-xr-x 6 hadoop hadoop      4096 May 31 22:50 python/

root@kylin:/opt/phoenix# vim /home/hadoop/.bashrc
---
# Phoenix
export PHOENIX_HOME=/opt/phoenix/current
export PATH=$PATH:$PHOENIX_HOME/bin
---
root@kylin:/opt/phoenix# source /home/hadoop/.bashrc

hadoop@kylin:/opt/phoenix/current/bin$ cp /etc/hbase/conf/hbase-site.xml /opt/phoenix/current/bin/
hadoop@kylin:/opt/phoenix/current/bin$ ./sqlline.py localhost
/usr/bin/env: ‘python’: No such file or directory

hadoop@kylin:/opt/phoenix/current/bin$ su root

root@kylin:~# apt-get install python
...

hadoop@kylin:/opt/phoenix/current/bin$ ./sqlline.py localhost
Setting property: [incremental, false]
Setting property: [isolation, TRANSACTION_READ_COMMITTED]
issuing: !connect jdbc:phoenix:localhost none none org.apache.phoenix.jdbc.PhoenixDriver
Connecting to jdbc:phoenix:localhost
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/phoenix/apache-phoenix-5.0.0-HBase-2.0-bin/phoenix-5.0.0-HBase-2.0-client.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
20/06/01 19:53:50 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
# phoenix启动报错
Error: org.apache.hadoop.hbase.DoNotRetryIOException: Unable to load configured region split policy 'org.apache.phoenix.schema.MetaDataSplitPolicy' for table 'SYSTEM.CATALOG' Set hbase.table.sanity.checks to false at conf or table descriptor if you want to bypass sanity checks
......
sqlline version 1.2.0
0: jdbc:phoenix:localhost> show databases;
No current connection
0: jdbc:phoenix:localhost>

hadoop@kylin:/opt/phoenix/current/bin$ cp ../*.jar /opt/hbase/current/lib/


hadoop@kylin:/opt/phoenix/current/bin$ ./sqlline.py localhost
Setting property: [incremental, false]
Setting property: [isolation, TRANSACTION_READ_COMMITTED]
issuing: !connect jdbc:phoenix:localhost none none org.apache.phoenix.jdbc.PhoenixDriver
Connecting to jdbc:phoenix:localhost
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/phoenix/apache-phoenix-5.0.0-HBase-2.0-bin/phoenix-5.0.0-HBase-2.0-client.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
20/06/01 20:51:32 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Connected to: Phoenix (version 5.0)
Driver: PhoenixEmbeddedDriver (version 5.0)
Autocommit status: true
Transaction isolation: TRANSACTION_READ_COMMITTED
Building list of tables and columns for tab-completion (set fastconnect to true to skip)...
133/133 (100%) Done
Done
sqlline version 1.2.0
0: jdbc:phoenix:localhost> !tables
+------------+--------------+-------------+---------------+----------+------------+----------------------------+------------+
| TABLE_CAT  | TABLE_SCHEM  | TABLE_NAME  |  TABLE_TYPE   | REMARKS  | TYPE_NAME  | SELF_REFERENCING_COL_NAME  | REF_GENERA |
+------------+--------------+-------------+---------------+----------+------------+----------------------------+------------+
|            | SYSTEM       | CATALOG     | SYSTEM TABLE  |          |            |                            |            |
|            | SYSTEM       | FUNCTION    | SYSTEM TABLE  |          |            |                            |            |
|            | SYSTEM       | LOG         | SYSTEM TABLE  |          |            |                            |            |
|            | SYSTEM       | SEQUENCE    | SYSTEM TABLE  |          |            |                            |            |
|            | SYSTEM       | STATS       | SYSTEM TABLE  |          |            |                            |            |
+------------+--------------+-------------+---------------+----------+------------+----------------------------+------------+
0: jdbc:phoenix:localhost> !sql
. . . . . . . . . . . . .> create table test1 (mykey integer not null primary key, mycolumn varchar);
No rows affected (2.524 seconds)
0: jdbc:phoenix:localhost> !sql
. . . . . . . . . . . . .> upsert into test1 values (1,'Hello');
1 row affected (0.395 seconds)
0: jdbc:phoenix:localhost> !sql
. . . . . . . . . . . . .> select * from test1;
+--------+-----------+
| MYKEY  | MYCOLUMN  |
+--------+-----------+
| 1      | Hello     |
+--------+-----------+
1 row selected (0.144 seconds)
0: jdbc:phoenix:localhost> !sql
. . . . . . . . . . . . .>  delete from test1 where mykey = 1;
1 row affected (0.042 seconds)
0: jdbc:phoenix:localhost>  select * from test1;
+--------+-----------+
| MYKEY  | MYCOLUMN  |
+--------+-----------+
+--------+-----------+
No rows selected (0.074 seconds)
0: jdbc:phoenix:localhost> !quit
Closing: org.apache.phoenix.jdbc.PhoenixConnection
hadoop@kylin:/opt/phoenix/current/bin$

# $PHOENIX_HOME/bin/sqlline.py localhost

Zookeeper 3.6.1

#zookeeper 3.6.1
#root@kylin:~/wget# wget https://mirrors.tuna.tsinghua.edu.cn/apache/zookeeper/zookeeper-3.6.1/apache-zookeeper-3.6.1-bin.tar.gz
#
root@kylin:~/wget# mkdir /opt/zookeeper
root@kylin:~/wget# tar -zxf apache-zookeeper-3.6.1-bin.tar.gz -C /opt/zookeeper/
root@kylin:~/wget# cd /opt/zookeeper/
root@kylin:/opt/zookeeper# mv apache-zookeeper-3.6.1-bin zookeeper-3.6.1
root@kylin:/opt/zookeeper# ln -s zookeeper-3.6.1 current
root@kylin:/opt/zookeeper# chown -R hadoop:hadoop /opt/zookeeper/

root@kylin:/opt/zookeeper# ll
total 12
drwxr-xr-x 3 hadoop hadoop 4096 May 30 22:56 ./
drwxr-xr-x 6 root   root   4096 May 30 22:53 ../
lrwxrwxrwx 1 hadoop hadoop   15 May 30 22:56 current -> zookeeper-3.6.1/
drwxr-xr-x 6 hadoop hadoop 4096 May 30 22:53 zookeeper-3.6.1/
root@kylin:/opt/zookeeper# ll current/
total 48
drwxr-xr-x 6 hadoop hadoop  4096 May 30 22:53 ./
drwxr-xr-x 3 hadoop hadoop  4096 May 30 22:56 ../
-rw-r--r-- 1 hadoop hadoop 11358 Apr 21 22:59 LICENSE.txt
-rw-r--r-- 1 hadoop hadoop   432 Apr 21 22:59 NOTICE.txt
-rw-r--r-- 1 hadoop hadoop  1963 Apr 21 22:59 README.md
-rw-r--r-- 1 hadoop hadoop  3166 Apr 21 22:59 README_packaging.md
drwxr-xr-x 2 hadoop hadoop  4096 Apr 21 22:59 bin/
drwxr-xr-x 2 hadoop hadoop  4096 Apr 21 22:59 conf/
drwxr-xr-x 5 hadoop hadoop  4096 Apr 21 23:00 docs/
drwxr-xr-x 2 hadoop hadoop  4096 May 30 22:53 lib/
root@kylin:/opt/zookeeper# mkdir /etc/zookeeper
root@kylin:/opt/zookeeper# cp -r /opt/zookeeper/current/conf /etc/zookeeper/
root@kylin:/opt/zookeeper# chown -R hadoop:hadoop /etc/zookeeper/

root@kylin:/opt/zookeeper# vim /home/hadoop/.bashrc
---
# Zookeeper
export ZK_HOME=/opt/zookeeper/current
export ZK_CONF_DIR=/etc/zookeeper/conf
export PATH=$PATH:$ZK_HOME/bin
---
root@kylin:/opt/zookeeper# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop

hadoop@kylin:~$ cd /opt/zookeeper/current
hadoop@kylin:/opt/zookeeper/current$ mkdir data
hadoop@kylin:/opt/zookeeper/current$ mkdir log

hadoop@kylin:~$ cd /etc/zookeeper/conf/
hadoop@kylin:/etc/zookeeper/conf$ ll
total 20
drwxr-xr-x 2 hadoop hadoop 4096 May 30 22:59 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 30 22:59 ../
-rw-r--r-- 1 hadoop hadoop  535 May 30 22:59 configuration.xsl
-rw-r--r-- 1 hadoop hadoop 3435 May 30 22:59 log4j.properties
-rw-r--r-- 1 hadoop hadoop 1148 May 30 22:59 zoo_sample.cfg
hadoop@kylin:/etc/zookeeper/conf$ cp zoo_sample.cfg zoo.cfg
hadoop@kylin:/etc/zookeeper/conf$ vim zoo.cfg
---
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
---
# Zookeeper独立模式
# 以独立模式运行ZooKeeper便于评估,某些开发和测试。但是在生产中,您应该以复制模式运行ZooKeeper。
# 同一应用程序中的一组服务器的复制组称为仲裁,并且在复制模式下,仲裁中的所有服务器都具有相同配置文件的副本。
hadoop@kylin:~$ cat /etc/zookeeper/conf/zoo.cfg |grep -v ^#|grep -v ^$
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
clientPort=2181

#设置Zookeeper复制模式
#对于复制模式,至少需要三台服务器,强烈建议您使用奇数个服务器。如果只有两台服务器,则可能会出现这样的情况:
#如果其中一台服务器发生故障,则没有足够的计算机构成多数仲裁。由于存在两个单点故障,因此两个服务器本来就不如单个服务器稳定。
#
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
clientPort=2181
server.1=<zoo1>:2888:3888
server.2=<zoo2>:2888:3888
server.3=<zoo3>:2888:3888

# echo "1" > /opt/zookeeper/current/data/myid  #zoo1
# echo "2" > /opt/zookeeper/current/data/myid  #zoo2
# echo "3" > /opt/zookeeper/current/data/myid  #zoo3

hadoop@kylin:~$ cd $ZK_HOME/conf
hadoop@kylin:/opt/zookeeper/current/conf$ ln -s /etc/zookeeper/conf/zoo.cfg zoo.cfg
hadoop@kylin:/opt/zookeeper/current/conf$ ll
total 20
drwxr-xr-x 2 hadoop hadoop 4096 May 30 23:19 ./
drwxr-xr-x 8 hadoop hadoop 4096 May 30 23:13 ../
-rw-r--r-- 1 hadoop hadoop  535 Apr 21 22:59 configuration.xsl
-rw-r--r-- 1 hadoop hadoop 3435 Apr 21 22:59 log4j.properties
lrwxrwxrwx 1 hadoop hadoop   27 May 30 23:19 zoo.cfg -> /etc/zookeeper/conf/zoo.cfg
-rw-r--r-- 1 hadoop hadoop 1148 Apr 21 22:59 zoo_sample.cfg

hadoop@kylin:~$ $ZK_HOME/bin/zkServer.sh start [-server localhost:2181]
ZooKeeper JMX enabled by default
Using config: /opt/zookeeper/current/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED

hadoop@kylin:~$ $ZK_HOME/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /opt/zookeeper/current/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: standalone

hadoop@kylin:~$ ps -ef | grep zookeeper


hadoop@kylin:~$ cp /etc/zookeeper/conf/zoo.cfg /etc/hbase/conf/

hadoop@kylin:/opt/hbase/current/conf$ ln -s /etc/hbase/conf/zoo.cfg zoo.cfg
hadoop@kylin:/opt/hbase/current/conf$ ll
total 52
drwxr-xr-x 2 hadoop hadoop 4096 May 30 23:31 ./
drwxr-xr-x 8 hadoop hadoop 4096 May 30 22:18 ../
-rw-r--r-- 1 hadoop hadoop 1811 May  2  2018 hadoop-metrics2-hbase.properties
-rw-r--r-- 1 hadoop hadoop 4284 Nov 28  2019 hbase-env.cmd
-rw-r--r-- 1 hadoop hadoop 7536 Jan 20 13:23 hbase-env.sh
-rw-r--r-- 1 hadoop hadoop 2257 May  2  2018 hbase-policy.xml
-rw-r--r-- 1 hadoop hadoop  934 May  2  2018 hbase-site.xml
-rw-r--r-- 1 hadoop hadoop 1169 Jan 17 12:25 log4j-hbtop.properties
-rw-r--r-- 1 hadoop hadoop 4977 Nov 28  2019 log4j.properties
-rw-r--r-- 1 hadoop hadoop   10 May  2  2018 regionservers
lrwxrwxrwx 1 hadoop hadoop   23 May 30 23:31 zoo.cfg -> /etc/hbase/conf/zoo.cfg

hadoop@kylin:/opt/zookeeper/current/conf$ sed -i 's/INFO/ERROR/' log4j.properties
hadoop@kylin:/opt/zookeeper/current/conf$ $ZK_HOME/bin/zkCli.sh
Connecting to localhost:2181
Welcome to ZooKeeper!
JLine support is enabled

WATCHER::

WatchedEvent state:SyncConnected type:None path:null
[zk: localhost:2181(CONNECTED) 0] ls /
[hbase, zookeeper]
[zk: localhost:2181(CONNECTED) 2] deleteall /hbase
[zk: localhost:2181(CONNECTED) 3] ls /
[zookeeper]

# 首先检查Zookeeper是否在端口2181上的本地主机上启动
hadoop@kylin:~$ netstat -tunelp | grep 2181 | grep -i LISTEN
(Not all processes could be identified, non-owned process info
 will not be shown, you would have to be root to see it all.)
tcp6       0      0 :::2181                 :::*                    LISTEN      1001       74402      15967/java

hadoop@kylin:/etc/kylin/conf$ netstat -anp | grep 2181


#HMaster在启动后10秒左右自动退出问题:
#网上看了许多方案,都没能解决问题!!!
#查看hbase-hadoop-master 日志 :HMaster: Failed to become active master
<!--
hadoop@kylin:/opt/hbase/current/logs$ tail -1000 hbase-hadoop-master-kylin.log | grep 06:30
Sun May 31 06:30:11 CST 2020 Starting master on kylin
2020-05-31 06:30:24,247 INFO  [main] util.log: Logging initialized @11657ms
2020-05-31 06:30:24,787 INFO  [main] server.Server: jetty-9.3.27.v20190418, build timestamp: 2019-04-19T02:11:38+08:00, git hash: d3e249f86955d04bc646bb620905b7c1bc596a8d
2020-05-31 06:30:24,996 INFO  [main] handler.ContextHandler: Started o.e.j.s.ServletContextHandler@4e6f2bb5{/logs,file:///opt/hbase/hbase-2.2.4/logs/,AVAILABLE}
2020-05-31 06:30:24,999 INFO  [main] handler.ContextHandler: Started o.e.j.s.ServletContextHandler@3f628ce9{/static,file:///opt/hbase/hbase-2.2.4/hbase-webapps/static/,AVAILABLE}
2020-05-31 06:30:25,273 INFO  [main] webapp.StandardDescriptorProcessor: NO JSP Support for /, did not find org.eclipse.jetty.jsp.JettyJspServlet
2020-05-31 06:30:25,409 INFO  [main] handler.ContextHandler: Started o.e.j.w.WebAppContext@2af4129d{/,file:///opt/hbase/hbase-2.2.4/hbase-webapps/master/,AVAILABLE}{file:/opt/hbase/hbase-2.2.4/hbase-webapps/master}
2020-05-31 06:30:25,436 INFO  [main] server.AbstractConnector: Started ServerConnector@295bf2a{HTTP/1.1,[http/1.1]}{0.0.0.0:16010}
2020-05-31 06:30:25,437 INFO  [main] server.Server: Started @12854ms
2020-05-31 06:30:27,875 ERROR [master/kylin:16000:becomeActiveMaster] master.HMaster: Failed to become active master
2020-05-31 06:30:27,879 ERROR [master/kylin:16000:becomeActiveMaster] master.HMaster: ***** ABORTING master kylin.localdomain,16000,1590877816126: Unhandled exception. Starting shutdown. *****
2020-05-31 06:30:29,131 INFO  [master/kylin:16000] handler.ContextHandler: Stopped o.e.j.w.WebAppContext@2af4129d{/,null,UNAVAILABLE}{file:/opt/hbase/hbase-2.2.4/hbase-webapps/master}
2020-05-31 06:30:29,149 INFO  [master/kylin:16000] server.AbstractConnector: Stopped ServerConnector@295bf2a{HTTP/1.1,[http/1.1]}{0.0.0.0:16010}
2020-05-31 06:30:29,152 INFO  [master/kylin:16000] handler.ContextHandler: Stopped o.e.j.s.ServletContextHandler@3f628ce9{/static,file:///opt/hbase/hbase-2.2.4/hbase-webapps/static/,UNAVAILABLE}
2020-05-31 06:30:29,153 INFO  [master/kylin:16000] handler.ContextHandler: Stopped o.e.j.s.ServletContextHandler@4e6f2bb5{/logs,file:///opt/hbase/hbase-2.2.4/logs/,UNAVAILABLE}
2020-05-31 06:30:29,201 ERROR [main] master.HMasterCommandLine: Master exiting
-->

#解决方法:
#在配置文件 hbase-site.xml中添加:
---
<property>
      <name>hbase.unsafe.stream.capability.enforce</name>
      <value>false</value>
</property>
---
# 重启服务之后,问题就解决了。
# 

Kylin 3.0.2

# Apache Kylin (3.0.2)
# http://kylin.apache.org/cn/
# 
# 适用于Hadoop 3.1 + HBase 2.0
root@kylin:~/wget# wget https://mirrors.tuna.tsinghua.edu.cn/apache/kylin/apache-kylin-3.0.2/apache-kylin-3.0.2-bin-hadoop3.tar.gz
--2020-05-31 06:56:45--  https://mirrors.tuna.tsinghua.edu.cn/apache/kylin/apache-kylin-3.0.2/apache-kylin-3.0.2-bin-hadoop3.tar.gz
Resolving mirrors.tuna.tsinghua.edu.cn (mirrors.tuna.tsinghua.edu.cn)... 101.6.8.193, 2402:f000:1:408:8100::1
Connecting to mirrors.tuna.tsinghua.edu.cn (mirrors.tuna.tsinghua.edu.cn)|101.6.8.193|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 250061858 (238M) [application/octet-stream]
Saving to: ‘apache-kylin-3.0.2-bin-hadoop3.tar.gz’

apache-kylin-3.0.2-bin-hadoop3.tar.g  11%[======>                                                             ]  27.08M  2.10MB/s    eta 1m 45s

root@kylin:~/wget# mkdir /opt/kylin
root@kylin:~/wget# tar -zxf apache-kylin-3.0.2-bin-hadoop3.tar.gz -C /opt/kylin/
root@kylin:~/wget# cd /opt/kylin/
root@kylin:/opt/kylin# ll
total 12
drwxr-xr-x 3 root root 4096 May 31 14:29 ./
drwxr-xr-x 7 root root 4096 May 31 14:28 ../
drwxr-xr-x 8 root root 4096 May 12 22:16 apache-kylin-3.0.2-bin-hadoop3/
root@kylin:/opt/kylin# ln -s apache-kylin-3.0.2-bin-hadoop3 kylin-3.0.2-hadoop3
root@kylin:/opt/kylin# ln -s kylin-3.0.2-hadoop3 current
root@kylin:/opt/kylin# ll
total 12
drwxr-xr-x 3 root root 4096 May 31 14:32 ./
drwxr-xr-x 7 root root 4096 May 31 14:28 ../
drwxr-xr-x 8 root root 4096 May 12 22:16 apache-kylin-3.0.2-bin-hadoop3/
lrwxrwxrwx 1 root root   19 May 31 14:32 current -> kylin-3.0.2-hadoop3/
lrwxrwxrwx 1 root root   30 May 31 14:31 kylin-3.0.2-hadoop3 -> apache-kylin-3.0.2-bin-hadoop3/
root@kylin:/opt/kylin# chown -R hadoop:hadoop /opt/kylin/
root@kylin:/opt/kylin# ll current
lrwxrwxrwx 1 hadoop hadoop 19 May 31 14:32 current -> kylin-3.0.2-hadoop3/
root@kylin:/opt/kylin# ll current/
total 64
drwxr-xr-x 8 hadoop hadoop  4096 May 12 22:16 ./
drwxr-xr-x 3 hadoop hadoop  4096 May 31 14:32 ../
-rw-r--r-- 1 hadoop hadoop 14725 May 12 22:16 LICENSE
-rw-r--r-- 1 hadoop hadoop   167 May 12 22:16 NOTICE
-rw-r--r-- 1 hadoop hadoop  2245 May 12 22:16 README.md
-rw-r--r-- 1 hadoop hadoop    19 May 12 22:16 VERSION
drwxr-xr-x 2 hadoop hadoop  4096 May 12 22:16 bin/
-rw-r--r-- 1 hadoop hadoop   823 May 12 22:16 commit_SHA1
drwxr-xr-x 2 hadoop hadoop  4096 May 12 22:16 conf/
drwxr-xr-x 3 hadoop hadoop  4096 May 12 22:16 lib/
drwxr-xr-x 4 hadoop hadoop  4096 May 12 22:16 sample_cube/
drwxr-xr-x 9 hadoop hadoop  4096 May 12 22:16 tomcat/
drwxr-xr-x 2 hadoop hadoop  4096 May 12 22:16 tool/

root@kylin:/opt/kylin# mkdir /etc/kylin
root@kylin:/opt/kylin# cp -r /opt/kylin/current/co
commit_SHA1  conf/
root@kylin:/opt/kylin# cp -r /opt/kylin/current/conf /etc/kylin/
root@kylin:/opt/kylin# chown -R hadoop:hadoop /etc/kylin/
root@kylin:/opt/kylin# vim /home/hadoop/.bashrc
---
# Kylin
export KYLIN_HOME=/opt/kylin/current
export KYLIN_CONF_DIR=/etc/kylin/conf
export PATH=$PATH:$KYLIN_HOME/bin
---
root@kylin:/opt/kylin# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop
hadoop@kylin:~$
hadoop@kylin:~$ cd /etc/kylin/conf
hadoop@kylin:/etc/kylin/conf$ cat kylin.properties | grep kylin.job.scheduler.default
#kylin.job.scheduler.default=0
#hadoop@kylin:/etc/kylin/conf$ sed -i 's/#kylin.job.scheduler.default=0/kylin.job.scheduler.default=2/' /etc/kylin/conf/kylin.properties
#hadoop@kylin:/etc/kylin/conf$ cat kylin.properties | grep kylin.job.scheduler.default
#kylin.job.scheduler.default=2
#hadoop@kylin:/etc/kylin/conf$ echo "kylin.job.lock=org.apache.kylin.storage.hbase.util.ZookeeperJobLock" >> /etc/kylin
#/conf/kylin.properties
#hadoop@kylin:/etc/kylin/conf$ cat kylin.properties | grep kylin.job.lock
#kylin.job.lock=org.apache.kylin.storage.hbase.util.ZookeeperJobLock

#---
#hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/#kylin.job.scheduler.default=0/kylin.job.scheduler.default=2/' kylin.properties
#hadoop@kylin:/opt/kylin/current/conf$ echo "kylin.job.lock=org.apache.kylin.storage.hbase.util.ZookeeperJobLock" >> kylin.properties
#hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/#kylin.server.mode=all/kylin.server.mode=all/' kylin.properties
#---
hadoop@kylin:~$ vim /opt/kylin/current/conf/kylin_hive_conf.xml
---
<property>
    <name>hive.exec.compress.output</name>
    <value>false</value>
    <description>Enable compress</description>
</property>
---


hadoop@kylin:~$ cat /opt/kylin/current/conf/kylin.properties | grep kylin.job.scheduler.default
kylin.job.scheduler.default=2
hadoop@kylin:/opt/kylin/current/conf$ cat kylin.properties | grep -v ^#|grep -v ^$
kylin.server.mode=all

hadoop@kylin:~$ vim $KYLIN_HOME/bin/download-spark.sh
hadoop@kylin:~$ $KYLIN_HOME/bin/check-env.sh
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
hadoop@kylin:~$ $KYLIN_HOME/bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
Retrieving hive dependency...
ERROR: Check hive`s usability failed, please check the status of your cluster

hadoop@kylin:~$ vim /home/hadoop/.bashrc
---
export hive_dependency=/etc/hive/conf:/opt/hive/current/lib/*:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-pig-adapter-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-core-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-server-extensions-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-streaming-3.1.2.jar:/opt/hive/current/lib/hive-exec-3.1.2.jar
---
hadoop@kylin:~$ source /home/hadoop/.bashrc

#重启系统

hadoop@kylin:~$ $KYLIN_HOME/bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
Retrieving hive dependency...
Retrieving hbase dependency...
hbase-common lib not found

hadoop@kylin:~$ vim /home/hadoop/.bashrc
---
#export hbase_dependency=/etc/hbase/conf:/opt/hbase/current/lib/*
export HBASE_CLASSPATH=/opt/hbase/current/lib/hbase-common-2.2.4.jar:/opt/hbase/current/lib/*
---
hadoop@kylin:~$ source /home/hadoop/.bashrc

hadoop@kylin:~$ $KYLIN_HOME/bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
Retrieving hive dependency...
Retrieving hbase dependency...
Retrieving hadoop conf dir...
Retrieving kafka dependency...
Retrieving Spark dependency...
spark not found, set SPARK_HOME, or run bin/download-spark.sh

#下载spark,并配置SPARK_HOME
hadoop@kylin:~$ $KYLIN_HOME/bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
Using cached dependency...
Start to check whether we need to migrate acl tables
Using cached dependency...
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
......
A new Kylin instance is started by hadoop. To stop it, run 'kylin.sh stop'
Check the log at /opt/kylin/current/logs/kylin.log
Web UI is at http://kylin.localdomain:7070/kylin

hadoop@kylin:~$ lsof -i:7070
COMMAND  PID   USER   FD   TYPE  DEVICE SIZE/OFF NODE NAME
java    8348 hadoop  995u  IPv6 1010270      0t0  TCP *:7070 (LISTEN)


# user: ADMIN
# passwd: KYLIN

===============================================

# User specific aliases and functions
export JAVA_HOME=/usr/java/default
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME=/opt/hadoop/current
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}
export HTTPFS_CATALINA_HOME=${HADOOP_HOME}/share/hadoop/httpfs/tomcat
export CATALINA_BASE=${HTTPFS_CATALINA_HOME}
export HADOOP_CONF_DIR=/etc/hadoop/conf
export HTTPFS_CONFIG=/etc/hadoop/conf
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

# Hive
export HIVE_HOME=/opt/hive/current
export HCAT_HOME=$HIVE_HOME/hcatalog
export HIVE_CONF_DIR=/etc/hive/conf
export PATH=$PATH:$HIVE_HOME/bin
export hive_dependency=/etc/hive/conf:/opt/hive/current/lib/*:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-pig-adapter-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-core-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-server-extensions-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-streaming-3.1.2.jar:/opt/hive/current/lib/hive-exec-3.1.2.jar

# HBase
export HBASE_HOME=/opt/hbase/current
export HBASE_CONF_DIR=/etc/hbase/conf
export PATH=$PATH:$HBASE_HOME/bin
#export hbase_dependency=/etc/hbase/conf:/opt/hbase/current/lib/*
export HBASE_CLASSPATH=/opt/hbase/current/lib/hbase-common-2.2.4.jar:/opt/hbase/current/lib/*

# Zookeeper
export ZK_HOME=/opt/zookeeper/current
export ZK_CONF_DIR=/etc/zookeeper/conf
export PATH=$PATH:$ZK_HOME/bin

# Kylin
export KYLIN_HOME=/opt/kylin/current
export KYLIN_CONF_DIR=/etc/kylin/conf
export PATH=$PATH:$KYLIN_HOME/bin

#Spark
export SPARK_HOME=/opt/spark/current
export SPARK_CONF_DIR=/etc/spark/conf
export PATH=$PATH:$SPARK_HOME/bin

===============================================

#使用kylin sample
# 个人参考博客 https://blog.csdn.net/weixin_45883933/article/details/105409492?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159095082319195264554930%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fblog.%2522%257D&request_id=159095082319195264554930&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~blog~first_rank_v1~rank_blog_v1-2-105409492.pc_v1_rank_blog_v1&utm_term=kylin

hadoop@kylin:/opt/kylin/current/bin$ ll | grep sample
-rwxr-xr-x  1 hadoop hadoop  1299 May 12 22:16 sample-streaming.sh*
-rwxr-xr-x  1 hadoop hadoop  6004 May 12 22:16 sample.sh*
hadoop@kylin:/opt/kylin/current/bin$ ./sample.sh
。。。。。
Sample cube is created successfully in project 'learn_kylin'.
Restart Kylin Server or click Web UI => System Tab => Reload Metadata to take effect

hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/INFO/WARN/' /etc/kylin/conf/kylin-server-log4j.properties
hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/INFO/WARN/' /etc/kylin/conf/kylin-tools-log4j.properties
hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/INFO/WARN/' /etc/kylin/conf/kylin-spark-log4j.properties

---
# 启动kylin时出现class冲突时错误操作!!!
#hadoop@kylin:~$ mv /opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar /opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar.bak
# 设置导致错误 HMaster HRegionServer启动自动退出!且Hadoop有异常。
# 后边改变回来了!!!
---

# 在之后kylin出现服务启动后不能访问Web UI的情况:
---
HTTP Status 404 – 未找到
Type Status Report

消息 Not found

描述 源服务器未能找到目标资源的表示或者是不愿公开一个已经存在的资源表示。

Apache Tomcat/8.5.51
----

# 寻求解决方案:

Kafka 2.5.0

# Kafka 2.5.0 
# Scala 2.12 (2.12 is recommended).
root@kylin:~/wget# wget https://mirror.bit.edu.cn/apache/kafka/2.5.0/kafka_2.12-2.5.0.tgz
--2020-06-01 03:16:23--  https://mirror.bit.edu.cn/apache/kafka/2.5.0/kafka_2.12-2.5.0.tgz
Resolving mirror.bit.edu.cn (mirror.bit.edu.cn)... 219.143.204.117, 202.204.80.77, 2001:da8:204:1205::22
Connecting to mirror.bit.edu.cn (mirror.bit.edu.cn)|219.143.204.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 61604633 (59M) [application/octet-stream]
Saving to: ‘kafka_2.12-2.5.0.tgz’

kafka_2.12-2.5.0.tgz                  14%[=========>                                                          ]   8.79M  1.22MB/s    eta 47s

root@kylin:~/wget# mkdir /opt/kafka
root@kylin:~/wget# tar -zxf kafka_2.12-2.5.0.tgz -C /opt/kafka/
root@kylin:~/wget# cd /opt/kafka/
root@kylin:/opt/kafka# ll
total 12
drwxr-xr-x  3 root root 4096 Jun  1 22:18 ./
drwxr-xr-x 10 root root 4096 Jun  1 22:16 ../
drwxr-xr-x  6 root root 4096 Apr  8 09:16 kafka_2.12-2.5.0/
root@kylin:/opt/kafka# ln -s kafka_2.12-2.5.0 current
root@kylin:/opt/kafka# chown -R hadoop:hadoop /opt/kafka/
root@kylin:/opt/kafka# ll current/
total 60
drwxr-xr-x 6 hadoop hadoop  4096 Apr  8 09:16 ./
drwxr-xr-x 3 hadoop hadoop  4096 Jun  1 22:19 ../
-rw-r--r-- 1 hadoop hadoop 32216 Apr  8 09:13 LICENSE
-rw-r--r-- 1 hadoop hadoop   337 Apr  8 09:13 NOTICE
drwxr-xr-x 3 hadoop hadoop  4096 Apr  8 09:16 bin/
drwxr-xr-x 2 hadoop hadoop  4096 Apr  8 09:16 config/
drwxr-xr-x 2 hadoop hadoop  4096 Jun  1 22:18 libs/
drwxr-xr-x 2 hadoop hadoop  4096 Apr  8 09:16 site-docs/

root@kylin:/opt/kafka# mkdir /etc/kafka
root@kylin:/opt/kafka# cp -r /opt/kafka/current/config /etc/kafka/
root@kylin:/opt/kafka# chown -R hadoop:hadoop /etc/kafka/
root@kylin:/opt/kafka# vim /home/hadoop/.bashrc
---
# Kafka
export KAFKA_HOME=/opt/kafka/current
export KAFKA_CONF_DIR=/etc/kafka/config
export PATH=$PATH:$KAFKA_HOME/bin
---
root@kylin:/opt/kafka# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop
hadoop@kylin:~$ cd /etc/kafka/config/
hadoop@kylin:/etc/kafka/config$ ll
total 80
drwxr-xr-x 2 hadoop hadoop 4096 Jun  1 22:23 ./
drwxr-xr-x 3 hadoop hadoop 4096 Jun  1 22:23 ../
-rw-r--r-- 1 hadoop hadoop  906 Jun  1 22:23 connect-console-sink.properties
-rw-r--r-- 1 hadoop hadoop  909 Jun  1 22:23 connect-console-source.properties
-rw-r--r-- 1 hadoop hadoop 5321 Jun  1 22:23 connect-distributed.properties
-rw-r--r-- 1 hadoop hadoop  883 Jun  1 22:23 connect-file-sink.properties
-rw-r--r-- 1 hadoop hadoop  881 Jun  1 22:23 connect-file-source.properties
-rw-r--r-- 1 hadoop hadoop 2247 Jun  1 22:23 connect-log4j.properties
-rw-r--r-- 1 hadoop hadoop 2540 Jun  1 22:23 connect-mirror-maker.properties
-rw-r--r-- 1 hadoop hadoop 2262 Jun  1 22:23 connect-standalone.properties
-rw-r--r-- 1 hadoop hadoop 1221 Jun  1 22:23 consumer.properties
-rw-r--r-- 1 hadoop hadoop 4675 Jun  1 22:23 log4j.properties
-rw-r--r-- 1 hadoop hadoop 1925 Jun  1 22:23 producer.properties
-rw-r--r-- 1 hadoop hadoop 6849 Jun  1 22:23 server.properties
-rw-r--r-- 1 hadoop hadoop 1032 Jun  1 22:23 tools-log4j.properties
-rw-r--r-- 1 hadoop hadoop 1169 Jun  1 22:23 trogdor.conf
-rw-r--r-- 1 hadoop hadoop 1205 Jun  1 22:23 zookeeper.properties
hadoop@kylin:/etc/kafka/config$  cat server.properties |grep -v ^#|grep -v ^$
broker.id=0
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/tmp/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=localhost:2181
zookeeper.connection.timeout.ms=18000
group.initial.rebalance.delay.ms=0

hadoop@kylin:/etc/kafka/config$ echo "listeners=PLAINTEXT://localhost:9092" >> server.properties
hadoop@kylin:/etc/kafka/config$ echo "delete.topic.enble=true" >> server.properties

hadoop@kylin:/etc/kafka/config$ cat server.properties | grep tmp
log.dirs=/tmp/kafka-logs
hadoop@kylin:/etc/kafka/config$ ll $KAFKA_HOME
lrwxrwxrwx 1 hadoop hadoop 16 Jun  1 22:19 /opt/kafka/current -> kafka_2.12-2.5.0/
hadoop@kylin:/etc/kafka/config$ ll $KAFKA_HOME/
total 60
drwxr-xr-x 6 hadoop hadoop  4096 Apr  8 09:16 ./
drwxr-xr-x 3 hadoop hadoop  4096 Jun  1 22:19 ../
-rw-r--r-- 1 hadoop hadoop 32216 Apr  8 09:13 LICENSE
-rw-r--r-- 1 hadoop hadoop   337 Apr  8 09:13 NOTICE
drwxr-xr-x 3 hadoop hadoop  4096 Jun  1 22:47 bin/
drwxr-xr-x 2 hadoop hadoop  4096 Apr  8 09:16 config/
drwxr-xr-x 2 hadoop hadoop  4096 Jun  1 22:18 libs/
drwxr-xr-x 2 hadoop hadoop  4096 Apr  8 09:16 site-docs/
hadoop@kylin:/etc/kafka/config$ pwd
/etc/kafka/config
hadoop@kylin:/etc/kafka/config$ mkdir /opt/kafka/current/kafka-logs
hadoop@kylin:/etc/kafka/config$ vim server.properties
hadoop@kylin:/etc/kafka/config$ cat server.properties |grep -v ^#|grep -v ^$
broker.id=0
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/opt/kafka/current/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=localhost:2181
zookeeper.connection.timeout.ms=18000
group.initial.rebalance.delay.ms=0
listeners=PLAINTEXT://localhost:9092
delete.topic.enble=true

# hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server.properties &
# 会输出日志并停留启动界面
# 

# Kafka Start
hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server.properties > $KAFKA_HOME/logs/nohup.out 2>&1 &
[1] 24277
hadoop@kylin:~$ jps | grep Kafka
24277 Kafka

# Kafka Stop
hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server.properties > $KAFKA_HOME/logs/nohup.out 2>&1 &

#为后期不能预测的可能做配置
hadoop@kylin:/opt/kafka/current/config$ mv server.properties server.properties.bak
hadoop@kylin:/opt/kafka/current/config$ ln -s /etc/kafka/config/server.properties server.properties

# http://kafka.apache.org/quickstart
# 

hadoop@kylin:~/scripts$ chmod +x kafka_*
hadoop@kylin:~/scripts$ ll
total 24
drwxrwxr-x  2 hadoop hadoop 4096 Jun  2 00:36 ./
drwxr-xr-x 11 hadoop hadoop 4096 Jun  2 00:36 ../
-rwxrwxr-x  1 hadoop hadoop  172 Jun  2 00:36 kafka_Consumer.sh*
-rwxrwxr-x  1 hadoop hadoop  192 Jun  2 00:23 kafka_CreateTopic.sh*
-rwxrwxr-x  1 hadoop hadoop   86 Jun  2 00:27 kafka_ListTopic.sh*
-rwxrwxr-x  1 hadoop hadoop  155 Jun  2 00:33 kafka_Producer.sh*

hadoop@kylin:~/scripts$ mv kafka_CreateTopic.sh 1-kafka_CreateTopic.sh
hadoop@kylin:~/scripts$ mv kafka_ListTopic.sh 2-kafka_ListTopic.sh
hadoop@kylin:~/scripts$ mv kafka_Producer.sh 3-kafka_Producer.sh
hadoop@kylin:~/scripts$ mv kafka_Consumer.sh 4-kafka_Consumer.sh
hadoop@kylin:~/scripts$ ll
total 24
drwxrwxr-x  2 hadoop hadoop 4096 Jun  2 00:41 ./
drwxr-xr-x 11 hadoop hadoop 4096 Jun  2 00:36 ../
-rwxrwxr-x  1 hadoop hadoop  192 Jun  2 00:23 1-kafka_CreateTopic.sh*
-rwxrwxr-x  1 hadoop hadoop   86 Jun  2 00:27 2-kafka_ListTopic.sh*
-rwxrwxr-x  1 hadoop hadoop  155 Jun  2 00:33 3-kafka_Producer.sh*
-rwxrwxr-x  1 hadoop hadoop  172 Jun  2 00:36 4-kafka_Consumer.sh*

# Use Kafka 
# 创建主题
hadoop@kylin:~/scripts$ cat 1-kafka_CreateTopic.sh
#!/bin/bash

read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic $TopicName
# 查看主题
hadoop@kylin:~/scripts$ cat 2-kafka_ListTopic.sh
#!/bin/bash

$KAFKA_HOME/bin/kafka-topics.sh --list --bootstrap-server localhost:9092
# 发送消息
hadoop@kylin:~/scripts$ cat 3-kafka_Producer.sh
#!/bin/bash

read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-console-producer.sh --bootstrap-server localhost:9092 --topic $TopicName
# 接收消息
hadoop@kylin:~/scripts$ cat 4-kafka_Consumer.sh
#!/bin/bash

read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic $TopicName --from-beginning

hadoop@kylin:~/scripts$ ./1-kafka_CreateTopic.sh
Please input a topic name:test
Created topic test.

hadoop@kylin:~/scripts$ ./2-kafka_ListTopic.sh
test
# shell 1
hadoop@kylin:~/scripts$ ./3-kafka_Producer.sh
Please input a topic name:test
>a
>b
>c
>^C
# shell 2
hadoop@kylin:~/scripts$ ./4-kafka_Consumer.sh
Please input a topic name:test
a
b
c
^CProcessed a total of 3 messages

# 建立多经纪商丛集
# 到目前为止,我们一直在与单一经纪人竞争,但这并不有趣。对于Kafka来说,单个代理只是一个大小为1的集群,
# 因此除了启动更多的代理实例之外,没有什么太大的变化。但是,只是为了感受一下,让我们将集群扩展到三个节点(仍然全部在本地计算机上)。

hadoop@kylin:~/scripts$ cd $KAFKA_CONF_DIR
hadoop@kylin:/etc/kafka/config$ cp server.properties server-1.properties
hadoop@kylin:/etc/kafka/config$ cp server.properties server-2.properties

hadoop@kylin:/etc/kafka/config$ vim server-1.properties
hadoop@kylin:/etc/kafka/config$ cat server-1.properties |grep -v ^#|grep -v ^$
broker.id=1
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/opt/kafka/current/kafka-logs-1
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=localhost:2181
zookeeper.connection.timeout.ms=18000
group.initial.rebalance.delay.ms=0
listeners=PLAINTEXT://localhost:9093
delete.topic.enble=true
hadoop@kylin:/etc/kafka/config$ vim server-2.properties
hadoop@kylin:/etc/kafka/config$ cat server-2.properties |grep -v ^#|grep -v ^$
broker.id=2
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/opt/kafka/current/kafka-logs-2
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=localhost:2181
zookeeper.connection.timeout.ms=18000
group.initial.rebalance.delay.ms=0
listeners=PLAINTEXT://localhost:9094
delete.topic.enble=true

hadoop@kylin:/etc/kafka/config$ mkdir /opt/kafka/current/kafka-logs-1
hadoop@kylin:/etc/kafka/config$ mkdir /opt/kafka/current/kafka-logs-2
hadoop@kylin:/etc/kafka/config$ cd -
/opt/kafka/current
hadoop@kylin:/opt/kafka/current$ ll
total 76
drwxr-xr-x 10 hadoop hadoop  4096 Jun  2 01:29 ./
drwxr-xr-x  3 hadoop hadoop  4096 Jun  1 22:19 ../
-rw-r--r--  1 hadoop hadoop 32216 Apr  8 09:13 LICENSE
-rw-r--r--  1 hadoop hadoop   337 Apr  8 09:13 NOTICE
drwxr-xr-x  3 hadoop hadoop  4096 Jun  1 22:47 bin/
drwxr-xr-x  2 hadoop hadoop  4096 Jun  1 23:35 config/
drwxrwxr-x 53 hadoop hadoop  4096 Jun  2 01:29 kafka-logs/
drwxrwxr-x  2 hadoop hadoop  4096 Jun  2 01:29 kafka-logs-1/
drwxrwxr-x  2 hadoop hadoop  4096 Jun  2 01:29 kafka-logs-2/
drwxr-xr-x  2 hadoop hadoop  4096 Jun  1 22:18 libs/
drwxrwxr-x  2 hadoop hadoop  4096 Jun  2 01:07 logs/
drwxr-xr-x  2 hadoop hadoop  4096 Apr  8 09:16 site-docs/

hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server-1.properties > $KAFKA_HOME/logs/nohup.out-1 2>&1 &
[2] 28303
hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server-2.properties > $KAFKA_HOME/logs/nohup.out-2 2>&1 &
[3] 28724

hadoop@kylin:~$ jps | grep Kafka
28303 Kafka
28724 Kafka
24277 Kafka

nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server-1.properties > $KAFKA_HOME/logs/nohup.out-1 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server-2.properties > $KAFKA_HOME/logs/nohup.out-2 2>&1 &

# 创建一个具有三个复制因子的主题:
hadoop@kylin:~/scripts$ cat 1-kafka_CreateTopic3f.sh
#!/bin/bash
read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 3 --partitions 1 --topic $TopicName

# 现在有了集群,我们如何知道哪个经纪人在做什么?要查看该命令,请运行“描述主题”命令
hadoop@kylin:~/scripts$ cat 2-kafka_DescribeTopic.sh
#!/bin/bash
read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-topics.sh --describe --bootstrap-server localhost:9092 --topic $TopicName

hadoop@kylin:~/scripts$ ./2-kafka_DescribeTopic.sh
Please input a topic name:test
Topic: test     PartitionCount: 1       ReplicationFactor: 1    Configs: segment.bytes=1073741824
        Topic: test     Partition: 0    Leader: 0       Replicas: 0     Isr: 0

# 创建主题 my-replicated-topic
hadoop@kylin:~/scripts$ ./1-kafka_CreateTopic3f.sh
Please input a topic name:my-replicated-topic
Created topic my-replicated-topic.

hadoop@kylin:~/scripts$ ./2-kafka_DescribeTopic.sh
Please input a topic name:my-replicated-topic
Topic: my-replicated-topic      PartitionCount: 1       ReplicationFactor: 3    Configs: segment.bytes=1073741824
        Topic: my-replicated-topic      Partition: 0    Leader: 1       Replicas: 1,0,2 Isr: 1,0,2


hadoop@kylin:~/scripts$ ps aux | grep server-1.properties | awk 'NR==1{print}' | awk '{print $2}'
28303
hadoop@kylin:~/scripts$ jps | grep Kafka
28303 Kafka
28724 Kafka
24277 Kafka

# kafka Connect
# Kafka Connect是Kafka附带的工具,用于将数据导入和导出到Kafka。
# 它是运行连接器的可扩展工具,该连接器实现用于与外部系统进行交互的自定义​​逻辑。
# 在此快速入门中,我们将看到如何使用简单的连接器运行Kafka Connect,该连接器将数据从文件导入到Kafka主题,并将数据从Kafka主题导出到文件。

#nohup $KAFKA_HOME/bin/connect-standalone.sh $KAFKA_CONF_DIR/connect-standalone.properties $KAFKA_CONF_DIR/connect-file-source.properties $KAFKA_CONF_DIR/connect-file-sink.properties > $KAFKA_HOME/logs/nohup.out-Connect 2>&1 &
hadoop@kylin:~/scripts$ cd $KAFKA_HOME
hadoop@kylin:/opt/kafka/current$ echo -e "foo\nbar" > test.txt
hadoop@kylin:/opt/kafka/current$ ll
total 80
drwxr-xr-x 10 hadoop hadoop  4096 Jun  2 02:31 ./
drwxr-xr-x  3 hadoop hadoop  4096 Jun  1 22:19 ../
-rw-r--r--  1 hadoop hadoop 32216 Apr  8 09:13 LICENSE
-rw-r--r--  1 hadoop hadoop   337 Apr  8 09:13 NOTICE
drwxr-xr-x  3 hadoop hadoop  4096 Jun  1 22:47 bin/
drwxr-xr-x  2 hadoop hadoop  4096 Jun  1 23:35 config/
drwxrwxr-x 54 hadoop hadoop  4096 Jun  2 02:31 kafka-logs/
drwxrwxr-x  3 hadoop hadoop  4096 Jun  2 02:31 kafka-logs-1/
drwxrwxr-x  3 hadoop hadoop  4096 Jun  2 02:31 kafka-logs-2/
drwxr-xr-x  2 hadoop hadoop  4096 Jun  1 22:18 libs/
drwxrwxr-x  2 hadoop hadoop  4096 Jun  2 02:07 logs/
drwxr-xr-x  2 hadoop hadoop  4096 Apr  8 09:16 site-docs/
-rw-rw-r--  1 hadoop hadoop     8 Jun  2 02:31 test.txt
hadoop@kylin:/opt/kafka/current$ nohup $KAFKA_HOME/bin/connect-standalone.sh $KAFKA_CONF_DIR/connect-standalone.properties $KAFKA_CONF_DIR/connect-file-source.properties $KAFKA_CONF_DIR/connect-file-sink.properties > $KAFKA_HOME/logs/nohup.out-Connect 2>&1 &
[4] 31488
hadoop@kylin:/opt/kafka/current$ jps
17568 RunJar
31488 ConnectStandalone
17792 HMaster
17154 ResourceManager
17444 JobHistoryServer
17608 QuorumPeerMain
17898 HRegionServer
28303 Kafka
28724 Kafka
24277 Kafka
17239 NodeManager
16953 NameNode
31867 Jps
17019 SecondaryNameNode
17084 DataNode
19903 RunJar
17567 RunJar
hadoop@kylin:/opt/kafka/current$ ll
total 84
drwxr-xr-x 10 hadoop hadoop  4096 Jun  2 02:36 ./
drwxr-xr-x  3 hadoop hadoop  4096 Jun  1 22:19 ../
-rw-r--r--  1 hadoop hadoop 32216 Apr  8 09:13 LICENSE
-rw-r--r--  1 hadoop hadoop   337 Apr  8 09:13 NOTICE
drwxr-xr-x  3 hadoop hadoop  4096 Jun  1 22:47 bin/
drwxr-xr-x  2 hadoop hadoop  4096 Jun  1 23:35 config/
drwxrwxr-x 54 hadoop hadoop  4096 Jun  2 02:36 kafka-logs/
drwxrwxr-x  3 hadoop hadoop  4096 Jun  2 02:36 kafka-logs-1/
drwxrwxr-x  4 hadoop hadoop  4096 Jun  2 02:36 kafka-logs-2/
drwxr-xr-x  2 hadoop hadoop  4096 Jun  1 22:18 libs/
drwxrwxr-x  2 hadoop hadoop  4096 Jun  2 02:36 logs/
drwxr-xr-x  2 hadoop hadoop  4096 Apr  8 09:16 site-docs/
-rw-rw-r--  1 hadoop hadoop     8 Jun  2 02:36 test.sink.txt
-rw-rw-r--  1 hadoop hadoop     8 Jun  2 02:31 test.txt
hadoop@kylin:/opt/kafka/current$ more test.sink.txt
foo
bar
hadoop@kylin:/opt/kafka/current$ /home/hadoop/scripts/4-kafka_Consumer.sh
Please input a topic name:connect-test
{"schema":{"type":"string","optional":false},"payload":"foo"}
{"schema":{"type":"string","optional":false},"payload":"bar"}
# 另一个shell中 执行:hadoop@kylin:/opt/kafka/current$ echo Another line>> test.txt
{"schema":{"type":"string","optional":false},"payload":"Another line"}

# kafka 文档
# http://kafka.apache.org/documentation/

Maxwells

# maxwells
# Maxwell是将mysql binlog中的insert、update等操作提取出来,并以json数据返回作为kafka生产者。
# 
# http://maxwells-daemon.io/
# root@kylin:~/wget# wget https://github.com/zendesk/maxwell/releases/download/v1.26.3/maxwell-1.26.3.tar.gz 
# 

# maxwell quickstart
# http://maxwells-daemon.io/quickstart/

root@kylin:~/wget# mkdir /opt/maxwell
root@kylin:~/wget# tar -zxf maxwell-1.26.3.tar.gz -C /opt/maxwell/
root@kylin:~/wget# cd /opt/maxwell/
root@kylin:/opt/maxwell# ll
total 12
drwxr-xr-x  3 root root 4096 Jun  2 20:45 ./
drwxr-xr-x 11 root root 4096 Jun  2 20:44 ../
drwxr-xr-x  4 root root 4096 Jun  2 20:45 maxwell-1.26.3/
root@kylin:/opt/maxwell# ln -s maxwell-1.26.3 current
root@kylin:/opt/maxwell# ll
total 12
drwxr-xr-x  3 root root 4096 Jun  2 20:45 ./
drwxr-xr-x 11 root root 4096 Jun  2 20:44 ../
lrwxrwxrwx  1 root root   14 Jun  2 20:45 current -> maxwell-1.26.3/
drwxr-xr-x  4 root root 4096 Jun  2 20:45 maxwell-1.26.3/
root@kylin:/opt/maxwell# ll current/
total 76
drwxr-xr-x 4 root root   4096 Jun  2 20:45 ./
drwxr-xr-x 3 root root   4096 Jun  2 20:45 ../
-rw-r--r-- 1  501 staff   548 Apr 23 02:50 LICENSE
-rw-r--r-- 1  501 staff  1429 May 26 13:16 README.md
drwxr-xr-x 2 root root   4096 Jun  2 20:45 bin/
-rw-r--r-- 1  501 staff 20113 May  6 22:47 config.md
-rw-r--r-- 1  501 staff 11574 May  6 22:47 config.properties.example
-rw-r--r-- 1  501 staff 10259 Apr 23 02:50 kinesis-producer-library.properties.example
drwxr-xr-x 3  501 staff  4096 May 26 13:18 lib/
-rw-r--r-- 1  501 staff   470 Apr 23 02:50 log4j2.xml
-rw-r--r-- 1  501 staff  3466 May 26 13:16 quickstart.md
root@kylin:/opt/maxwell# vim current/quickstart.md
root@kylin:/opt/maxwell# chown -R hadoop:hadoop /opt/maxwell/

root@kylin:/opt/maxwell/current# vim /home/hadoop/.bashrc
# Maxwell
export MAXWELL_HOME=/opt/maxwell/current
export MAXWELL_CONF_DIR=/opt/maxwell/current
export PATH=$PATH:$MAXWELL_HOME/bin

root@kylin:/opt/maxwell/current# source /home/hadoop/.bashrc

#配置mysql
# vim vim /etc/mysql/my.cnf
root@kylin:/opt/maxwell# vim /etc/mysql/mysql.conf.d/mysqld.cnf
# maxwell
server_id=1
log-bin=master
binlog_format=row

#或在运行的服务器上:
root@kylin:/opt/maxwell# mysql -u root -p
Enter password:
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 604
Server version: 5.7.30-0ubuntu0.18.04.1-log (Ubuntu)

Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
#打开基于行的复制
#注意:binlog_format是基于会话的属性。您将需要关闭所有活动连接以完全转换为基于行的复制。
mysql> set global binlog_format=ROW;
mysql> set global binlog_row_image=FULL;
#权限: Maxwell需要权限来将状态存储在schema_database选项指定的数据库中(默认值maxwell)。
mysql> CREATE USER 'maxwell'@'%' IDENTIFIED BY '123456';
mysql> GRANT ALL ON maxwell.* TO 'maxwell'@'%';
mysql> GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE ON *.* TO 'maxwell'@'%';
# or for running maxwell locally:
mysql> CREATE USER 'maxwell'@'localhost' IDENTIFIED BY '123456';
mysql> GRANT ALL ON maxwell.* TO 'maxwell'@'localhost';
mysql> GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE ON *.* TO 'maxwell'@'localhost';

mysql> flush privileges;

# 运行maxwell
root@kylin:/opt/maxwell/current# ./bin/maxwell --user='maxwell' --password='123456' --host='127.0.0.1' --producer=stdout
Using kafka version: 1.0.0
21:28:46,334 WARN  MaxwellMetrics - Metrics will not be exposed: metricsReportingType not configured.
21:28:48,529 INFO  Maxwell - Maxwell v1.26.3 is booting (StdoutProducer), starting at Position[BinlogPosition[mysql_binary_log.000016:60802], lastHeartbeat=1591104258877]
21:28:49,059 INFO  MysqlSavedSchema - Restoring schema id 1 (last modified at Position[BinlogPosition[mysql_binary_log.000016:6119], lastHeartbeat=0])
21:28:49,678 INFO  BinlogConnectorReplicator - Setting initial binlog pos to: mysql_binary_log.000016:60802
21:28:49,770 INFO  BinaryLogClient - Connected to 127.0.0.1:3306 at mysql_binary_log.000016/60802 (sid:6379, cid:14)
21:28:49,771 INFO  BinlogConnectorReplicator - Binlog connected.
21:37:04,068 INFO  AbstractSchemaStore - storing schema @Position[BinlogPosition[mysql_binary_log.000017:34269], lastHeartbeat=1591105013625] after applying "create table students(     id int unsigned not null auto_increment primary key,     name char(8) not null,     sex char(4) not null,     age int unsigned not null,     tel char(13) null default '-'   )" to maxwell, new schema id is 2

# 用于测试的语句
mysql> create table students(
    ->     id int unsigned not null auto_increment primary key,
    ->     name char(8) not null,
    ->     sex char(4) not null,
    ->     age tinyint unsigned not null,
    ->     tel char(13) null default '-'
    ->   );

# maxwell 配置 
root@kylin:/opt/maxwell/current# cp config.properties.example config.properties

root@kylin:/opt/maxwell/current# cat config.properties | grep -v ^# | grep -v ^$
log_level=info
producer=kafka
kafka.bootstrap.servers=localhost:9092
host=localhost
user=maxwell
password=maxwell
kafka.compression.type=snappy
kafka.retries=0
kafka.acks=1
root@kylin:/opt/maxwell/current# vim config.properties
root@kylin:/opt/maxwell/current# cat config.properties | grep -v ^# | grep -v ^$
log_level=info
producer=kafka
kafka.bootstrap.servers=localhost:9092
host=localhost
user=maxwell
password=maxwell
kafka_topic=test
kafka.compression.type=snappy
kafka.retries=0
kafka.acks=all
kinesis_stream=test

# 启动默认配置的maxwell

root@kylin:/home/hadoop/scripts# cat maxwell_start.sh
#!/bin/bash
read -p "Please input a topic name:" TopicName
$MAXWELL_HOME/bin/maxwell --user='maxwell' --password='123456' --host='127.0.0.1' --producer=kafka --kafka.bootstrap.servers=localhost:9092 --kafka_topic=$TopicName

hadoop@kylin:~/scripts$ ./maxwell_start.sh
Please input a topic name:test
Using kafka version: 1.0.0
22:44:41,907 WARN  MaxwellMetrics - Metrics will not be exposed: metricsReportingType not configured.
22:44:43,764 INFO  ProducerConfig - ProducerConfig values:
        acks = 1
        batch.size = 16384
        bootstrap.servers = [localhost:9092]
        buffer.memory = 33554432
        client.id =
        compression.type = none
        connections.max.idle.ms = 540000
        enable.idempotence = false
        interceptor.classes = null
        key.serializer = class org.apache.kafka.common.serialization.StringSerializer
        linger.ms = 0
        max.block.ms = 60000
        max.in.flight.requests.per.connection = 5
        max.request.size = 1048576
        metadata.max.age.ms = 300000
        metric.reporters = []
        metrics.num.samples = 2
        metrics.recording.level = INFO
        metrics.sample.window.ms = 30000
        partitioner.class = class org.apache.kafka.clients.producer.internals.DefaultPartitioner
        receive.buffer.bytes = 32768
        reconnect.backoff.max.ms = 1000
        reconnect.backoff.ms = 50
        request.timeout.ms = 30000
        retries = 0
        retry.backoff.ms = 100
        sasl.jaas.config = null
        sasl.kerberos.kinit.cmd = /usr/bin/kinit
        sasl.kerberos.min.time.before.relogin = 60000
        sasl.kerberos.service.name = null
        sasl.kerberos.ticket.renew.jitter = 0.05
        sasl.kerberos.ticket.renew.window.factor = 0.8
        sasl.mechanism = GSSAPI
        security.protocol = PLAINTEXT
        send.buffer.bytes = 131072
        ssl.cipher.suites = null
        ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1]
        ssl.endpoint.identification.algorithm = null
        ssl.key.password = null
        ssl.keymanager.algorithm = SunX509
        ssl.keystore.location = null
        ssl.keystore.password = null
        ssl.keystore.type = JKS
        ssl.protocol = TLS
        ssl.provider = null
        ssl.secure.random.implementation = null
        ssl.trustmanager.algorithm = PKIX
        ssl.truststore.location = null
        ssl.truststore.password = null
        ssl.truststore.type = JKS
        transaction.timeout.ms = 60000
        transactional.id = null
        value.serializer = class org.apache.kafka.common.serialization.StringSerializer

22:44:43,993 INFO  AppInfoParser - Kafka version : 1.0.0
22:44:43,994 INFO  AppInfoParser - Kafka commitId : aaa7af6d4a11b29d
22:44:44,079 INFO  Maxwell - Maxwell v1.26.3 is booting (MaxwellKafkaProducer), starting at Position[BinlogPosition[mysql_binary_log.000017:61598], lastHeartbeat=1591109008809]
22:44:44,684 INFO  MysqlSavedSchema - Restoring schema id 2 (last modified at Position[BinlogPosition[mysql_binary_log.000017:34269], lastHeartbeat=1591105013625])
22:44:45,223 INFO  MysqlSavedSchema - Restoring schema id 1 (last modified at Position[BinlogPosition[mysql_binary_log.000016:6119], lastHeartbeat=0])
22:44:45,494 INFO  MysqlSavedSchema - beginning to play deltas...
22:44:45,508 INFO  MysqlSavedSchema - played 1 deltas in 13ms
22:44:45,788 INFO  BinlogConnectorReplicator - Setting initial binlog pos to: mysql_binary_log.000017:61598
22:44:45,905 INFO  BinaryLogClient - Connected to 127.0.0.1:3306 at mysql_binary_log.000017/61598 (sid:6379, cid:154)
22:44:45,906 INFO  BinlogConnectorReplicator - Binlog connected.
^C22:45:07,513 INFO  MaxwellContext - Sending final heartbeat: 1591109107513
22:45:07,536 INFO  BinlogConnectorReplicator - received final heartbeat 1591109107513; stopping replicator
22:45:07,539 INFO  BinlogConnectorReplicator - Binlog disconnected.
22:45:07,640 INFO  TaskManager - Stopping 4 tasks
22:45:07,641 INFO  TaskManager - Stopping: com.zendesk.maxwell.schema.PositionStoreThread@27eb9248
22:45:07,642 INFO  TaskManager - Stopping: com.zendesk.maxwell.producer.MaxwellKafkaProducerWorker@292c68c5
22:45:07,642 INFO  PositionStoreThread - Storing final position: Position[BinlogPosition[mysql_binary_log.000017:65003], lastHeartbeat=1591109107513]
22:45:07,653 INFO  KafkaProducer - [Producer clientId=producer-1] Closing the Kafka producer with timeoutMillis = 9223372036854775807 ms.
22:45:07,691 INFO  TaskManager - Stopping: com.zendesk.maxwell.bootstrap.BootstrapController@6b5b04b6
22:45:07,691 INFO  TaskManager - Stopping: com.zendesk.maxwell.replication.BinlogConnectorReplicator@83e9db1
22:45:09,127 INFO  TaskManager - Stopped all tasks


# binlog
# 
#开启binlog
vi /etc/mysql/mysql.conf.d/mysqld.cnf
---
log_bin=/var/lib/mysql/mysql_binary_log
---
# 查看是否开启binlog
# 进入mysql客户端:
mysql> show variables like '%log_bin%';
+---------------------------------+---------------------------------------+
| Variable_name                   | Value                                 |
+---------------------------------+---------------------------------------+
| log_bin                         | ON                                    |
| log_bin_basename                | /var/lib/mysql/mysql_binary_log       |
| log_bin_index                   | /var/lib/mysql/mysql_binary_log.index |
| log_bin_trust_function_creators | OFF                                   |
| log_bin_use_v1_row_events       | OFF                                   |
| sql_log_bin                     | ON                                    |
+---------------------------------+---------------------------------------+
6 rows in set (0.01 sec)

mysql> show variables like 'binlog_format';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| binlog_format | ROW   |
+---------------+-------+
1 row in set (0.00 sec)

# Mysql binlog日志有ROW,Statement,MiXED三种格式;
# set global binlog_format='ROW/STATEMENT/MIXED'

# 查看binlog日志文件

sudo /usr/bin/mysqlbinlog  /var/lib/mysql/mysql_binary_log.00000x  --base64-output=decode-rows -v
 

Scala 2.12.11

# Scala 2.12
# https://www.scala-lang.org/download
# https://www.scala-lang.org/download/2.12.11.html

root@kylin:~/wget# wget https://downloads.lightbend.com/scala/2.12.11/scala-2.12.11.tgz
--2020-06-02 03:02:11--  https://downloads.lightbend.com/scala/2.12.11/scala-2.12.11.tgz
Resolving downloads.lightbend.com (downloads.lightbend.com)... 13.35.50.87, 13.35.50.28, 13.35.50.10, ...
Connecting to downloads.lightbend.com (downloads.lightbend.com)|13.35.50.87|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 20795989 (20M) [application/octet-stream]
Saving to: ‘scala-2.12.11.tgz’

scala-2.12.11.tgz                    100%[===================================================================>]  19.83M  1.91MB/s    in 12s

2020-06-02 03:02:23 (1.72 MB/s) - ‘scala-2.12.11.tgz’ saved [20795989/20795989]

root@kylin:~/wget# mkdir /usr/scala
root@kylin:~/wget# tar -zxf scala-2.12.11.tgz -C /usr/scala/
root@kylin:~/wget# cd /usr/scala/
root@kylin:/usr/scala# ll scala-2.12.11/
total 40
drwxrwxr-x 6 2000 2000  4096 Mar 16 17:41 ./
drwxr-xr-x 3 root root  4096 Jun  2 12:53 ../
-rw-rw-r-- 1 2000 2000 11357 Mar 16 17:41 LICENSE
-rw-rw-r-- 1 2000 2000   646 Mar 16 17:41 NOTICE
drwxrwxr-x 2 2000 2000  4096 Mar 16 17:41 bin/
drwxrwxr-x 4 2000 2000  4096 Mar 16 17:41 doc/
drwxrwxr-x 2 2000 2000  4096 Mar 16 17:41 lib/
drwxrwxr-x 3 2000 2000  4096 Mar 16 17:41 man/

root@kylin:/usr/scala# ln -s scala-2.12.11 default
root@kylin:/usr/scala# vim /etc/profile
---
export SCALA_HOME=/usr/scala/default
export PATH=$SCALA_HOME/bin:$PATH
---
root@kylin:/usr/scala# source /etc/profile
root@kylin:/usr/scala# scala -version
Scala code runner version 2.12.11 -- Copyright 2002-2020, LAMP/EPFL and Lightbend, Inc.
root@kylin:/usr/scala#

Spark 3.0.0

# Spark (3.0.0)
# https://spark.apache.org/downloads.html
root@kylin:~/wget# wget https://mirror.bit.edu.cn/apache/spark/spark-3.0.0-preview2/spark-3.0.0-preview2-bin-hadoop3.2.tgz
--2020-05-31 22:16:56--  https://mirror.bit.edu.cn/apache/spark/spark-3.0.0-preview2/spark-3.0.0-preview2-bin-hadoop3.2.tgz
Resolving mirror.bit.edu.cn (mirror.bit.edu.cn)... 202.204.80.77, 219.143.204.117, 2001:da8:204:1205::22
Connecting to mirror.bit.edu.cn (mirror.bit.edu.cn)|202.204.80.77|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 239947422 (229M) [application/octet-stream]
Saving to: ‘spark-3.0.0-preview2-bin-hadoop3.2.tgz’

spark-3.0.0-preview2-bin-hadoop3.   1%[                                                            ]   2.48M  76.0KB/s    eta 36m 14s

root@kylin:~/wget# mkdir /opt/spark
root@kylin:~/wget# tar -zxf spark-3.0.0-preview2-bin-hadoop3.2.tgz -C /opt/spark/
root@kylin:~/wget# cd /opt/spark/
root@kylin:/opt/spark# ll
total 12
drwxr-xr-x  3 root      root   4096 May 31 23:29 ./
drwxr-xr-x  9 root      root   4096 May 31 23:29 ../
drwxr-xr-x 13 110302528 ubuntu 4096 Dec 17 12:59 spark-3.0.0-preview2-bin-hadoop3.2/
root@kylin:/opt/spark# ln -s spark-3.0.0-preview2-bin-hadoop3.2 current
root@kylin:/opt/spark# chown -R hadoop:hadoop /opt/spark/
root@kylin:/opt/spark# ll current/
total 164
drwxr-xr-x 13 hadoop hadoop  4096 Dec 17 12:59 ./
drwxr-xr-x  3 hadoop hadoop  4096 May 31 23:30 ../
-rw-r--r--  1 hadoop hadoop 23311 Dec 17 12:59 LICENSE
-rw-r--r--  1 hadoop hadoop 57677 Dec 17 12:59 NOTICE
drwxr-xr-x  3 hadoop hadoop  4096 Dec 17 12:59 R/
-rw-r--r--  1 hadoop hadoop  4666 Dec 17 12:59 README.md
-rw-r--r--  1 hadoop hadoop   192 Dec 17 12:59 RELEASE
drwxr-xr-x  2 hadoop hadoop  4096 Dec 17 12:59 bin/
drwxr-xr-x  2 hadoop hadoop  4096 Dec 17 12:59 conf/
drwxr-xr-x  5 hadoop hadoop  4096 Dec 17 12:59 data/
drwxr-xr-x  4 hadoop hadoop  4096 Dec 17 12:59 examples/
drwxr-xr-x  2 hadoop hadoop 20480 Dec 17 12:59 jars/
drwxr-xr-x  4 hadoop hadoop  4096 Dec 17 12:59 kubernetes/
drwxr-xr-x  2 hadoop hadoop  4096 Dec 17 12:59 licenses/
drwxr-xr-x  7 hadoop hadoop  4096 Dec 17 12:59 python/
drwxr-xr-x  2 hadoop hadoop  4096 Dec 17 12:59 sbin/
drwxr-xr-x  2 hadoop hadoop  4096 Dec 17 12:59 yarn/
root@kylin:/opt/spark# mkdir /etc/spark
root@kylin:/opt/spark# cp -r /opt/spark/current/conf /etc/spark/
root@kylin:/opt/spark# chown -R hadoop:hadoop /etc/spark/
root@kylin:/opt/spark# vim /home/hadoop/.bashrc
---
#Spark
export SPARK_HOME=/opt/spark/current
export SPARK_CONF_DIR=/etc/spark/conf
export PATH=$PATH:$SPARK_HOME/bin
---
root@kylin:/opt/spark# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop

hadoop@kylin:~$ cd /etc/spark/conf/
hadoop@kylin:/etc/spark/conf$ ll
total 44
drwxr-xr-x 2 hadoop hadoop 4096 May 31 23:38 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 31 23:38 ../
-rw-r--r-- 1 hadoop hadoop 1105 May 31 23:38 fairscheduler.xml.template
-rw-r--r-- 1 hadoop hadoop 2023 May 31 23:38 log4j.properties.template
-rw-r--r-- 1 hadoop hadoop 9141 May 31 23:38 metrics.properties.template
-rw-r--r-- 1 hadoop hadoop  865 May 31 23:38 slaves.template
-rw-r--r-- 1 hadoop hadoop 1292 May 31 23:38 spark-defaults.conf.template
-rwxr-xr-x 1 hadoop hadoop 4344 May 31 23:38 spark-env.sh.template*
hadoop@kylin:/etc/spark/conf$ mv spark-env.sh.template spark-env.sh
hadoop@kylin:/etc/spark/conf$ vim spark-env.sh
hadoop@kylin:/etc/spark/conf$ cat spark-env.sh | grep -v ^#|grep -v ^$
export JAVA_HOME=/usr/java/default
export SCALA_HOME=/usr/scala/default
export HADOOP_HOME=/opt/hadoop/current
export HADOOP_CONF_DIR=/etc/hadoop/conf
export SPARK_HOME=/opt/spark/current
export SPARK_CONF_DIR=/etc/spark/conf


hadoop@kylin:/etc/spark/conf$ mv slaves.template slaves
hadoop@kylin:/etc/spark/conf$ cat slaves | grep -v ^#|grep -v ^$
localhost

hadoop@kylin:/etc/spark/conf$ cd /$SPARK_HOME
hadoop@kylin://opt/spark/current$ cd conf/
hadoop@kylin://opt/spark/current/conf$ ll
total 44
drwxr-xr-x  2 hadoop hadoop 4096 Dec 17 12:59 ./
drwxr-xr-x 13 hadoop hadoop 4096 Dec 17 12:59 ../
-rw-r--r--  1 hadoop hadoop 1105 Dec 17 12:59 fairscheduler.xml.template
-rw-r--r--  1 hadoop hadoop 2023 Dec 17 12:59 log4j.properties.template
-rw-r--r--  1 hadoop hadoop 9141 Dec 17 12:59 metrics.properties.template
-rw-r--r--  1 hadoop hadoop  865 Dec 17 12:59 slaves.template
-rw-r--r--  1 hadoop hadoop 1292 Dec 17 12:59 spark-defaults.conf.template
-rwxr-xr-x  1 hadoop hadoop 4344 Dec 17 12:59 spark-env.sh.template*
hadoop@kylin://opt/spark/current/conf$ ln -s /etc/spark/conf/spark-env.sh spark-env.sh
hadoop@kylin://opt/spark/current/conf$ ln -s /etc/spark/conf/slaves slaves
hadoop@kylin://opt/spark/current/conf$ ll
total 44
drwxr-xr-x  2 hadoop hadoop 4096 Jun  2 13:56 ./
drwxr-xr-x 13 hadoop hadoop 4096 Dec 17 12:59 ../
-rw-r--r--  1 hadoop hadoop 1105 Dec 17 12:59 fairscheduler.xml.template
-rw-r--r--  1 hadoop hadoop 2023 Dec 17 12:59 log4j.properties.template
-rw-r--r--  1 hadoop hadoop 9141 Dec 17 12:59 metrics.properties.template
lrwxrwxrwx  1 hadoop hadoop   22 Jun  2 13:56 slaves -> /etc/spark/conf/slaves
-rw-r--r--  1 hadoop hadoop  865 Dec 17 12:59 slaves.template
-rw-r--r--  1 hadoop hadoop 1292 Dec 17 12:59 spark-defaults.conf.template
lrwxrwxrwx  1 hadoop hadoop   28 Jun  2 13:56 spark-env.sh -> /etc/spark/conf/spark-env.sh*
-rwxr-xr-x  1 hadoop hadoop 4344 Dec 17 12:59 spark-env.sh.template*

# https://spark.apache.org/docs/latest/spark-standalone.html
# 启动 master
hadoop@kylin:~$ $SPARK_HOME/sbin/start-master.sh
starting org.apache.spark.deploy.master.Master, logging to /opt/spark/current/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-kylin.out

# 停止 master
hadoop@kylin:~$ $SPARK_HOME/sbin/stop-master.sh
stopping org.apache.spark.deploy.master.Master

# 启动 worker
hadoop@kylin:~$ $SPARK_HOME/sbin/start-slave.sh
Usage: ./sbin/start-slave.sh <master> [options]
2020-06-02 14:10:50,173 WARN util.Utils: Your hostname, kylin resolves to a loopback address: 127.0.1.1; using 172.26.10.48 instead (on interface eth0)
2020-06-02 14:10:50,175 WARN util.Utils: Set SPARK_LOCAL_IP if you need to bind to another address

Master must be a URL of the form spark://hostname:port

Options:
  -c CORES, --cores CORES  Number of cores to use
  -m MEM, --memory MEM     Amount of memory to use (e.g. 1000M, 2G)
  -d DIR, --work-dir DIR   Directory to run apps in (default: SPARK_HOME/work)
  -i HOST, --ip IP         Hostname to listen on (deprecated, please use --host or -h)
  -h HOST, --host HOST     Hostname to listen on
  -p PORT, --port PORT     Port to listen on (default: random)
  --webui-port PORT        Port for web UI (default: 8081)
  --properties-file FILE   Path to a custom Spark properties file.
                           Default is conf/spark-defaults.conf.
# 启动 worker
hadoop@kylin:~$ $SPARK_HOME/sbin/start-slave.sh spark://kylin.localdomain:7077
starting org.apache.spark.deploy.worker.Worker, logging to /opt/spark/current/logs/spark-hadoop-org.apache.spark.deploy.worker.Worker-1-kylin.out

# 停止 worker
hadoop@kylin:~$ $SPARK_HOME/sbin/stop-slave.sh spark://kylin.localdomain:7077
stopping org.apache.spark.deploy.worker.Worker

# ./bin/spark-shell --master spark://IP:PORT
# 
hadoop@kylin:~$ $SPARK_HOME/bin/spark-shell
2020-06-02 15:04:30,279 WARN util.Utils: Your hostname, kylin resolves to a loopback address: 127.0.1.1; using 172.26.10.48 instead (on interface eth0)
2020-06-02 15:04:30,283 WARN util.Utils: Set SPARK_LOCAL_IP if you need to bind to another address
2020-06-02 15:04:31,882 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://hadoop:4040
Spark context available as 'sc' (master = local[*], app id = local-1591081501036).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/ ''_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.0.0-preview2
      /_/

Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_181)
Type in expressions to have them evaluated.
Type :help for more information.

scala>^C

hadoop@kylin:~$ $SPARK_HOME/bin/spark-submit  --class  org.apache.spark.examples.SparkPi  --master local   /opt/spark/current/examples/jars/spark-examples_2.12-3.0.0-preview2.jar
'
......
2020-06-02 15:22:02,628 INFO scheduler.DAGScheduler: Job 0 finished: reduce at SparkPi.scala:38, took 3.606569 s
Pi is roughly 3.1397756988784944
......
'

hadoop@kylin:~$ $SPARK_HOME/bin/spark-submit  --class  org.apache.spark.examples.SparkPi  --master yarn /opt/spark/current/examples/jars/spark-examples_2.12-3.0.0-preview2.jar
'
......
2020-06-02 15:26:52,090 INFO scheduler.DAGScheduler: Job 0 finished: reduce at SparkPi.scala:38, took 21.626012 s
Pi is roughly 3.1376956884784426
......
'

# hadoop@kylin:~$ mv /opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar /opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar.bak

#配置Spark读取hive表:

hadoop@kylin:~$ vim /etc/hive/conf/hive-site.xml
<property>
<name>hive.metastore.uris</name>
  <value>thrift://localhost:9083</value>
</property>


hadoop@kylin:~$ cp /etc/hive/conf/hive-site.xml /etc/spark/conf/
hadoop@kylin:~$ cd $SPARK_CONF_DIR
hadoop@kylin:/etc/spark/conf$ chmod 777 hive-site.xml

hadoop@kylin:~$ cp /opt/hive/current/lib/mysql-connector-java-5.1.45.jar /opt/spark/current/jars/

# spark sql 操作hive
# 
# 执行 $SPARK_HOME/bin/spark-sql 时会有大量INFO日志,执行如下操作:
hadoop@kylin:/opt/spark/current/bin$ cd /etc/spark/conf/
hadoop@kylin:/etc/spark/conf$ cp log4j.properties.template log4j.properties
hadoop@kylin:/etc/spark/conf$ sed -i 's/INFO/ERROR/' log4j.properties

hadoop@kylin:/opt/spark/current/bin$ spark-sql
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
Spark master: local[*], Application Id: local-1591270878565
spark-sql (default)> show databases;
namespace
default
test_bdw
Time taken: 9.809 seconds, Fetched 2 row(s)
spark-sql (default)> use default;
Response code
Time taken: 0.344 seconds
spark-sql (default)> show tables;
database        tableName       isTemporary
default kylin_account   false
default kylin_cal_dt    false
default kylin_category_groupings        false
default kylin_country   false
default kylin_sales     false
Time taken: 0.459 seconds, Fetched 5 row(s)
spark-sql (default)> select * from kylin_country limit 3;
country latitude        longitude       name
AD      42.546245       1.601554        Andorra
AE      23.424076       53.847818       United Arab Emirates
AF      33.93911        67.709953       Afghanistan
Time taken: 9.221 seconds, Fetched 3 row(s)
spark-sql (default)> quit;

hadoop@kylin:/opt/spark/current/bin$ mv /opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar /opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar.bak

# pyspark
hadoop@kylin:/opt/spark/current/bin$ pyspark
Python 2.7.17 (default, Apr 15 2020, 17:20:14)
[GCC 7.5.0] on linux2
Type "help", "copyright", "credits" or "license" for more information.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
/opt/spark/current/python/pyspark/context.py:219: DeprecationWarning: Support for Python 2 and Python 3 prior to version 3.6 is deprecated as of Spark 3.0. See also the plan for dropping Python 2 support at https://spark.apache.org/news/plan-for-dropping-python-2-support.html.
  DeprecationWarning)
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/ ''_/
   /__ / .__/\_,_/_/ /_/\_\   version 3.0.0-preview2
      /_/

Using Python version 2.7.17 (default, Apr 15 2020 17:20:14)
SparkSession available as 'spark'.
>>>



# spark shell 
hadoop@kylin:~$ $SPARK_HOME/bin/spark-shell
2020-06-02 16:36:23,933 WARN util.Utils: Your hostname, kylin resolves to a loopback address: 127.0.1.1; using 172.26.10.48 instead (on interface eth0)
2020-06-02 16:36:23,939 WARN util.Utils: Set SPARK_LOCAL_IP if you need to bind to another address
2020-06-02 16:36:26,612 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://hadoop:4040
Spark context available as 'sc' (master = local[*], app id = local-1591087011628).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/ ''_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.0.0-preview2
      /_/

Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_181)
Type in expressions to have them evaluated.
Type :help for more information.

scala> import org.apache.spark.sql.hive.HiveContext
<console>:23: error: object HiveContext is not a member of package org.apache.spark.sql.hive
       import org.apache.spark.sql.hive.HiveContext
              ^


hadoop@kylin:/etc/spark/conf$ vim spark-env.sh
hadoop@kylin:/etc/spark/conf$ cat spark-env.sh | grep -v ^#|grep -v ^$
export JAVA_HOME=/usr/java/default
export SCALA_HOME=/usr/scala/default
export HADOOP_HOME=/opt/hadoop/current
export HADOOP_CONF_DIR=/etc/hadoop/conf
export SPARK_HOME=/opt/spark/current
export SPARK_CONF_DIR=/etc/spark/conf
export CLASSPATH=$CLASSPATH:/opt/hive/current/lib
export HIVE_CONF_DIR=/etc/hive/conf
export SPARK_CLASSPATH=$SPARK_CLASSPATH:/opt/hive/current/lib/mysql-connector-java-5.1.45.jar
export SPARK_DIST_CLASSPATH=$(/opt/hadoop/current/bin/hadoop classpath)


# Spark版本不包含Hive支持。
# 可以选择从Spark源码构建.
#
root@kylin:~/wget# wget https://mirrors.bfsu.edu.cn/apache/spark/spark-3.0.0-preview2/spark-3.0.0-preview2.tgz
--2020-06-02 22:12:03--  https://mirrors.bfsu.edu.cn/apache/spark/spark-3.0.0-preview2/spark-3.0.0-preview2.tgz
Resolving mirrors.bfsu.edu.cn (mirrors.bfsu.edu.cn)... 39.155.141.16, 2001:da8:20f:4435:4adf:37ff:fe55:2840
Connecting to mirrors.bfsu.edu.cn (mirrors.bfsu.edu.cn)|39.155.141.16|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 21811982 (21M) [application/octet-stream]
Saving to: ‘spark-3.0.0-preview2.tgz’

spark-3.0.0-preview2.tgz             100%[===================================================================>]  20.80M  5.30MB/s    in 4.3s

2020-06-02 22:12:08 (4.87 MB/s) - ‘spark-3.0.0-preview2.tgz’ saved [21811982/21811982]

root@kylin:~/wget# tar -zxf spark-3.0.0-preview2.tgz -C /opt/spark/
root@kylin:~/wget# cd /opt/spark/
root@kylin:/opt/spark# mv spark-3.0.0-preview2 spark-3.0.0-preview2-sourcecode
root@kylin:/opt/spark# chown -R hadoop:hadoop spark-3.0.0-preview2-sourcecode/


root@kylin:~# apt install maven
root@kylin:~# mvn -version
Apache Maven 3.6.0
Maven home: /usr/share/maven
Java version: 1.8.0_181, vendor: Oracle Corporation, runtime: /usr/java/jdk1.8.0_181/jre
Default locale: en, platform encoding: UTF-8
OS name: "linux", version: "4.19.104-microsoft-standard", arch: "amd64", family: "unix"

root@kylin:~# whereis maven
maven: /etc/maven /usr/share/maven
root@kylin:~# cd /usr/share/maven
root@kylin:/usr/share/maven# ll
total 24
drwxr-xr-x   6 root root 4096 Jun  2 17:41 ./
drwxr-xr-x 118 root root 4096 Jun  2 17:41 ../
drwxr-xr-x   2 root root 4096 Jun  2 17:41 bin/
drwxr-xr-x   2 root root 4096 Jun  2 17:41 boot/
lrwxrwxrwx   1 root root   10 Apr 10  2019 conf -> /etc/maven/
drwxr-xr-x   2 root root 4096 Jun  2 17:41 lib/
drwxr-xr-x   2 root root 4096 Jun  2 17:41 man/
root@kylin:/usr/share/maven# vim /root/.bashrc
---
export MAVEN_HOME=/usr/share/maven
export PATH=$PATH:$MAVEN_HOME/bin
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
---
root@kylin:/usr/share/maven# source /root/.bashrc

root@kylin:/opt/spark# cd spark-3.0.0-preview2-sourcecode/
root@kylin:/opt/spark/spark-3.0.0-preview2-sourcecode# ./dev/change-scala-version.sh 2.12
./dev/../graph/graph/pom.xml
./dev/../graph/cypher/pom.xml
./dev/../graph/api/pom.xml
./dev/../mllib-local/pom.xml
./dev/../repl/pom.xml
./dev/../launcher/pom.xml
./dev/../examples/pom.xml
./dev/../sql/catalyst/pom.xml
./dev/../sql/core/pom.xml
./dev/../sql/hive/pom.xml
./dev/../sql/hive-thriftserver/pom.xml
./dev/../graphx/pom.xml
./dev/../resource-managers/yarn/pom.xml
./dev/../resource-managers/kubernetes/integration-tests/pom.xml
./dev/../resource-managers/kubernetes/core/pom.xml
./dev/../resource-managers/mesos/pom.xml
./dev/../common/unsafe/pom.xml
./dev/../common/tags/pom.xml
./dev/../common/kvstore/pom.xml
./dev/../common/network-shuffle/pom.xml
./dev/../common/network-yarn/pom.xml
./dev/../common/sketch/pom.xml
./dev/../common/network-common/pom.xml
./dev/../mllib/pom.xml
./dev/../hadoop-cloud/pom.xml
./dev/../core/pom.xml
./dev/../assembly/pom.xml
./dev/../external/kafka-0-10/pom.xml
./dev/../external/avro/pom.xml
./dev/../external/kinesis-asl-assembly/pom.xml
./dev/../external/spark-ganglia-lgpl/pom.xml
./dev/../external/docker-integration-tests/pom.xml
./dev/../external/kafka-0-10-assembly/pom.xml
./dev/../external/kafka-0-10-token-provider/pom.xml
./dev/../external/kafka-0-10-sql/pom.xml
./dev/../external/kinesis-asl/pom.xml
./dev/../tools/pom.xml
./dev/../pom.xml
./dev/../streaming/pom.xml
./dev/../docs/_plugins/copy_api_dirs.rb

root@kylin:/opt/spark/spark-3.0.0-preview2-sourcecode# mvn -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests clean package
[INFO] Scanning for projects...
。。。。。。


#mvn -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests clean package
hadoop@kylin:/opt/spark/spark-3.0.0-preview2-sourcecode$ mvn -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests clean package
......

[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary for Spark Project Parent POM 3.0.0-preview2:
[INFO]
[INFO] Spark Project Parent POM ........................... SUCCESS [  5.716 s]
[INFO] Spark Project Tags ................................. SUCCESS [ 17.282 s]
[INFO] Spark Project Sketch ............................... SUCCESS [ 20.009 s]
[INFO] Spark Project Local DB ............................. SUCCESS [  3.199 s]
[INFO] Spark Project Networking ........................... SUCCESS [  6.765 s]
[INFO] Spark Project Shuffle Streaming Service ............ SUCCESS [  7.790 s]
[INFO] Spark Project Unsafe ............................... SUCCESS [ 29.087 s]
[INFO] Spark Project Launcher ............................. SUCCESS [  4.287 s]
[INFO] Spark Project Core ................................. SUCCESS [07:57 min]
[INFO] Spark Project ML Local Library ..................... SUCCESS [01:34 min]
[INFO] Spark Project GraphX ............................... SUCCESS [02:14 min]
[INFO] Spark Project Streaming ............................ SUCCESS [04:30 min]
[INFO] Spark Project Catalyst ............................. SUCCESS [22:20 min]
[INFO] Spark Project SQL .................................. SUCCESS [19:29 min]
[INFO] Spark Project ML Library ........................... SUCCESS [09:54 min]
[INFO] Spark Project Tools ................................ SUCCESS [ 42.535 s]
[INFO] Spark Project Hive ................................. SUCCESS [06:23 min]
[INFO] Spark Project Graph API ............................ SUCCESS [  5.058 s]
[INFO] Spark Project Cypher ............................... SUCCESS [  3.054 s]
[INFO] Spark Project Graph ................................ SUCCESS [  5.758 s]
[INFO] Spark Project REPL ................................. SUCCESS [02:23 min]
[INFO] Spark Project YARN Shuffle Service ................. SUCCESS [ 38.680 s]
[INFO] Spark Project YARN ................................. SUCCESS [07:10 min]
[INFO] Spark Project Hive Thrift Server ................... SUCCESS [16:51 min]
[INFO] Spark Project Assembly ............................. SUCCESS [ 44.521 s]
[INFO] Kafka 0.10+ Token Provider for Streaming ........... SUCCESS [12:00 min]
[INFO] Spark Integration for Kafka 0.10 ................... SUCCESS [21:40 min]
[INFO] Kafka 0.10+ Source for Structured Streaming ........ SUCCESS [12:49 min]
[INFO] Spark Project Examples ............................. SUCCESS [06:29 min]
[INFO] Spark Integration for Kafka 0.10 Assembly .......... SUCCESS [ 55.229 s]
[INFO] Spark Avro ......................................... SUCCESS [03:43 min]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time:  02:42 h
[INFO] Finished at: 2020-06-03T23:54:05+08:00
[INFO] ------------------------------------------------------------------------

#./dev/./make-distribution.sh --name hadoop3.2.1-hive --tgz -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests
hadoop@kylin:/opt/spark/spark-3.0.0-preview2-sourcecode$ ./dev/./make-distribution.sh --name hadoop3.2.1-hive --tgz -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests

[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary for Spark Project Parent POM 3.0.0-preview2:
[INFO]
[INFO] Spark Project Parent POM ........................... SUCCESS [ 13.513 s]
[INFO] Spark Project Tags ................................. SUCCESS [ 23.672 s]
[INFO] Spark Project Sketch ............................... SUCCESS [ 25.063 s]
[INFO] Spark Project Local DB ............................. SUCCESS [  4.005 s]
[INFO] Spark Project Networking ........................... SUCCESS [  7.310 s]
[INFO] Spark Project Shuffle Streaming Service ............ SUCCESS [  3.006 s]
[INFO] Spark Project Unsafe ............................... SUCCESS [ 24.651 s]
[INFO] Spark Project Launcher ............................. SUCCESS [  4.874 s]
[INFO] Spark Project Core ................................. SUCCESS [06:04 min]
[INFO] Spark Project ML Local Library ..................... SUCCESS [01:19 min]
[INFO] Spark Project GraphX ............................... SUCCESS [03:00 min]
[INFO] Spark Project Streaming ............................ SUCCESS [04:25 min]
[INFO] Spark Project Catalyst ............................. SUCCESS [11:58 min]
[INFO] Spark Project SQL .................................. SUCCESS [17:14 min]
[INFO] Spark Project ML Library ........................... SUCCESS [09:33 min]
[INFO] Spark Project Tools ................................ SUCCESS [ 36.058 s]
[INFO] Spark Project Hive ................................. SUCCESS [08:19 min]
[INFO] Spark Project Graph API ............................ SUCCESS [  4.513 s]
[INFO] Spark Project Cypher ............................... SUCCESS [  2.620 s]
[INFO] Spark Project Graph ................................ SUCCESS [  6.191 s]
[INFO] Spark Project REPL ................................. SUCCESS [02:11 min]
[INFO] Spark Project YARN Shuffle Service ................. SUCCESS [ 43.326 s]
[INFO] Spark Project YARN ................................. SUCCESS [04:02 min]
[INFO] Spark Project Hive Thrift Server ................... SUCCESS [03:56 min]
[INFO] Spark Project Assembly ............................. SUCCESS [ 23.387 s]
[INFO] Kafka 0.10+ Token Provider for Streaming ........... SUCCESS [01:27 min]
[INFO] Spark Integration for Kafka 0.10 ................... SUCCESS [02:27 min]
[INFO] Kafka 0.10+ Source for Structured Streaming ........ SUCCESS [04:18 min]
[INFO] Spark Project Examples ............................. SUCCESS [01:53 min]
[INFO] Spark Integration for Kafka 0.10 Assembly .......... SUCCESS [12:11 min]
[INFO] Spark Avro ......................................... SUCCESS [02:37 min]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time:  01:40 h
[INFO] Finished at: 2020-06-04T02:00:15+08:00
[INFO] ------------------------------------------------------------------------

Flink 1.10.1

# Flink 1.10.1
# https://flink.apache.org/zh/
root@kylin:~/wget# wget https://mirrors.bfsu.edu.cn/apache/flink/flink-1.10.1/flink-1.10.1-bin-scala_2.12.tgz
--2020-06-03 01:15:39--  https://mirrors.bfsu.edu.cn/apache/flink/flink-1.10.1/flink-1.10.1-bin-scala_2.12.tgz
Resolving mirrors.bfsu.edu.cn (mirrors.bfsu.edu.cn)... 39.155.141.16, 2001:da8:20f:4435:4adf:37ff:fe55:2840
Connecting to mirrors.bfsu.edu.cn (mirrors.bfsu.edu.cn)|39.155.141.16|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 279396579 (266M) [application/octet-stream]
Saving to: ‘flink-1.10.1-bin-scala_2.12.tgz’

flink-1.10.1-bin-scala_2.12.tgz      100%[===================================================================>] 266.45M  4.03MB/s    in 2m 39s

2020-06-03 01:18:21 (1.67 MB/s) - ‘flink-1.10.1-bin-scala_2.12.tgz’ saved [279396579/279396579]


root@kylin:~/wget# mkdir /opt/flink
root@kylin:~/wget# tar -zxf flink-1.10.1-bin-scala_2.12.tgz -C /opt/flink/
root@kylin:~/wget# cd /opt/flink/
root@kylin:/opt/flink# ll
total 12
drwxr-xr-x  3 root root 4096 Jun  5 00:29 ./
drwxr-xr-x 14 root root 4096 Jun  5 00:28 ../
drwxr-xr-x 10 root root 4096 May  7 18:09 flink-1.10.1/
root@kylin:/opt/flink# ln -s flink-1.10.1 current
root@kylin:/opt/flink# ll
total 12
drwxr-xr-x  3 root root 4096 Jun  5 00:30 ./
drwxr-xr-x 14 root root 4096 Jun  5 00:28 ../
lrwxrwxrwx  1 root root   12 Jun  5 00:30 current -> flink-1.10.1/
drwxr-xr-x 10 root root 4096 May  7 18:09 flink-1.10.1/
root@kylin:/opt/flink# cd current
root@kylin:/opt/flink/current# ll
total 596
drwxr-xr-x 10 root root   4096 May  7 18:09 ./
drwxr-xr-x  3 root root   4096 Jun  5 00:30 ../
drwxr-xr-x  2 root root   4096 May  7 18:09 bin/
drwxr-xr-x  2 root root   4096 May  7 18:09 conf/
drwxr-xr-x  7 root root   4096 May  7 18:09 examples/
drwxr-xr-x  2 root root   4096 May  7 18:09 lib/
-rw-r--r--  1 root root  11357 May  7 13:45 LICENSE
drwxr-xr-x  2 root root   4096 May  7 18:09 licenses/
drwxr-xr-x  2 root root   4096 May  7 13:45 log/
-rw-r--r--  1 root root 550465 May  7 18:09 NOTICE
drwxr-xr-x  2 root root   4096 May  7 18:09 opt/
drwxr-xr-x  2 root root   4096 May  7 13:45 plugins/
-rw-r--r--  1 root root   1309 May  7 13:45 README.txt
root@kylin:/opt/flink/current# cd ..
root@kylin:/opt/flink# ll current/
total 596
drwxr-xr-x 10 root root   4096 May  7 18:09 ./
drwxr-xr-x  3 root root   4096 Jun  5 00:30 ../
drwxr-xr-x  2 root root   4096 May  7 18:09 bin/
drwxr-xr-x  2 root root   4096 May  7 18:09 conf/
drwxr-xr-x  7 root root   4096 May  7 18:09 examples/
drwxr-xr-x  2 root root   4096 May  7 18:09 lib/
-rw-r--r--  1 root root  11357 May  7 13:45 LICENSE
drwxr-xr-x  2 root root   4096 May  7 18:09 licenses/
drwxr-xr-x  2 root root   4096 May  7 13:45 log/
-rw-r--r--  1 root root 550465 May  7 18:09 NOTICE
drwxr-xr-x  2 root root   4096 May  7 18:09 opt/
drwxr-xr-x  2 root root   4096 May  7 13:45 plugins/
-rw-r--r--  1 root root   1309 May  7 13:45 README.txt
root@kylin:/opt/flink# chown -R hadoop:hadoop /opt/flink/
root@kylin:/opt/flink# mkdir /etc/flink
root@kylin:/opt/flink# cp -r /opt/flink/current/conf /etc/flink/
root@kylin:/opt/flink# chown -R hadoop:hadoop /etc/flink/
root@kylin:/opt/flink# vim /home/hadoop/.bashrc

# Flink
export FLINK_HOME=/opt/flink/current
export FLINK_CONF_DIR=/etc/flink/conf
export PATH=$PATH:$FLINK_HOME/bin

root@kylin:/opt/flink# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop
hadoop@kylin:~$ cd /etc/flink/conf/

# flink Standalone 模式配置
# https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/deployment/cluster_setup.html#configuring-flink
hadoop@kylin:/etc/flink/conf$ vim flink-conf.yaml
hadoop@kylin:/etc/flink/conf$ cat flink-conf.yaml | grep -v ^# | grep -v ^$

jobmanager.rpc.address: localhost
jobmanager.rpc.port: 6123
jobmanager.heap.size: 1024m
taskmanager.memory.process.size: 1728m
taskmanager.numberOfTaskSlots: 1
parallelism.default: 1
jobmanager.execution.failover-strategy: region
rest.port: 6061
io.tmp.dirs: /tmp

hadoop@kylin:/etc/flink/conf$ cat masters
localhost:6061
hadoop@kylin:/etc/flink/conf$ vim masters
hadoop@kylin:/etc/flink/conf$ cat masters
localhost
hadoop@kylin:/etc/flink/conf$ cat slaves
localhost

hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/start-cluster.sh
Starting cluster.
Starting standalonesession daemon on host kylin.
Starting taskexecutor daemon on host kylin.

hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/stop-cluster.sh
Stopping taskexecutor daemon (pid: 28874) on host kylin.
No standalonesession daemon (pid: 29287) is running anymore on kylin.

hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/jobmanager.sh start
Starting standalonesession daemon on host kylin.

hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/taskmanager.sh start
Starting taskexecutor daemon on host kylin.

hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/jobmanager.sh stop
Stopping standalonesession daemon (pid: 31453) on host kylin.
hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/taskmanager.sh stop
Stopping taskexecutor daemon (pid: 31841) on host kylin.

hadoop@kylin:/opt/flink/current$ ./bin/flink run examples/batch/WordCount.jar
Executing WordCount example with default input data set.
Use --input to specify file input.
Printing result to stdout. Use --output to specify output path.
Job has been submitted with JobID 13dc9137f65ff788b594f8d0911ddbf7
Program execution finished
Job with JobID 13dc9137f65ff788b594f8d0911ddbf7 has finished.
Job Runtime: 3323 ms
Accumulator Results:
- ee1e51903121e33fcc0251615b5a2841 (java.util.ArrayList) [170 elements]


(a,5)
(action,1)
(after,1)
(against,1)
(all,2)
(and,12)
......


# On Yarn 模式和 HA 配置
# https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/jobmanager_high_availability.html
# 配置过 HADOOP_CONF_DIR了!

hadoop@kylin:/opt/flink$ wget https://mirrors.tuna.tsinghua.edu.cn/apache/flink/flink-shaded-11.0/flink-shaded-11.0-src.tgz
--2020-06-05 02:13:37--  https://mirrors.tuna.tsinghua.edu.cn/apache/flink/flink-shaded-11.0/flink-shaded-11.0-src.tgz
Resolving mirrors.tuna.tsinghua.edu.cn (mirrors.tuna.tsinghua.edu.cn)... 101.6.8.193, 2402:f000:1:408:8100::1
Connecting to mirrors.tuna.tsinghua.edu.cn (mirrors.tuna.tsinghua.edu.cn)|101.6.8.193|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 14529 (14K) [application/octet-stream]
Saving to: ‘flink-shaded-11.0-src.tgz’

flink-shaded-11.0-src.tgz               100%[============================================================================>]  14.19K  --.-KB/s    in 0s

2020-06-05 02:13:37 (97.8 MB/s) - ‘flink-shaded-11.0-src.tgz’ saved [14529/14529]


hadoop@kylin:/opt/flink$ ll
total 28
drwxr-xr-x  3 hadoop hadoop  4096 Jun  5 02:16 ./
drwxr-xr-x 14 root   root    4096 Jun  5 00:28 ../
lrwxrwxrwx  1 hadoop hadoop    12 Jun  5 00:30 current -> flink-1.10.1/
drwxr-xr-x 10 hadoop hadoop  4096 May  7 18:09 flink-1.10.1/
-rw-rw-r--  1 hadoop hadoop 14529 May 26 01:03 flink-shaded-11.0-src.tgz
hadoop@kylin:/opt/flink$ tar -zxf flink-shaded-11.0-src.tgz
hadoop@kylin:/opt/flink$ ll
total 32
drwxr-xr-x  4 hadoop hadoop  4096 Jun  5 02:18 ./
drwxr-xr-x 14 root   root    4096 Jun  5 00:28 ../
lrwxrwxrwx  1 hadoop hadoop    12 Jun  5 00:30 current -> flink-1.10.1/
drwxr-xr-x 10 hadoop hadoop  4096 May  7 18:09 flink-1.10.1/
drwxrwxr-x 11 hadoop hadoop  4096 May 26 01:00 flink-shaded-11.0/
-rw-rw-r--  1 hadoop hadoop 14529 May 26 01:03 flink-shaded-11.0-src.tgz
hadoop@kylin:/opt/flink$ cd flink-shaded-11.0/
hadoop@kylin:/opt/flink/flink-shaded-11.0$ ll
total 80
drwxrwxr-x 11 hadoop hadoop  4096 May 26 01:00 ./
drwxr-xr-x  4 hadoop hadoop  4096 Jun  5 02:18 ../
drwxrwxr-x  3 hadoop hadoop  4096 May 26 01:00 flink-shaded-asm-7/
drwxrwxr-x  2 hadoop hadoop  4096 May 26 01:00 flink-shaded-force-shading/
drwxrwxr-x  3 hadoop hadoop  4096 May 26 01:00 flink-shaded-guava-18/
drwxrwxr-x  4 hadoop hadoop  4096 May 26 01:00 flink-shaded-jackson-parent/
drwxrwxr-x  3 hadoop hadoop  4096 May 26 01:00 flink-shaded-netty-4/
drwxrwxr-x  3 hadoop hadoop  4096 May 26 01:00 flink-shaded-netty-tcnative-dynamic/
drwxrwxr-x  3 hadoop hadoop  4096 May 26 01:00 flink-shaded-netty-tcnative-static/
drwxrwxr-x  4 hadoop hadoop  4096 May 26 01:00 flink-shaded-zookeeper-parent/
-rwxrwxr-x  1 hadoop hadoop 11357 May 26 01:00 LICENSE*
-rwxrwxr-x  1 hadoop hadoop   166 May 26 01:00 NOTICE*
-rwxrwxr-x  1 hadoop hadoop 13791 May 26 01:00 pom.xml*
-rwxrwxr-x  1 hadoop hadoop  1720 May 26 01:00 README.md*
drwxrwxr-x  3 hadoop hadoop  4096 May 26 01:00 tools/

# hadoop 3.2.1 默认guava版本为27
# 
# https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/deployment/hadoop.html
hadoop@kylin:/opt/flink/flink-shaded-11.0$ mvn clean install -Dhadoop.version=3.2.1
# 没打包成功
# 选择下载2.X的包也是可以的,只要不用到hadoop3特有API就行了 
hadoop@kylin:/opt/flink/current/lib$ wget https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-9.0/flink-shaded-hadoop-2-uber-2.8.3-9.0.jar



hadoop@kylin:~$ cd /etc/flink/conf/
hadoop@kylin:/etc/flink/conf$ cat flink-conf.yaml | grep high-availability
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
# high-availability: zookeeper
# high-availability.storageDir: hdfs:///flink/ha/
# The list of ZooKeeper quorum peers that coordinate the high-availability
# high-availability.zookeeper.quorum: localhost:2181
# high-availability.zookeeper.client.acl: open
# 
hadoop@kylin:/etc/flink/conf$ sed -i 's/# high-availability/high-availability/' flink-conf.yaml

hadoop@kylin:/etc/flink/conf$ cat flink-conf.yaml | grep high-availability
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
high-availability: zookeeper
high-availability.storageDir: hdfs://localhost:9000/flink/ha/
# The list of ZooKeeper quorum peers that coordinate the high-availability
high-availability.zookeeper.quorum: localhost:2181
high-availability.zookeeper.client.acl: open


hadoop@kylin:/etc/flink/conf$ hadoop fs -mkdir -p /flink/ha/
hadoop@kylin:/etc/flink/conf$ hadoop fs -ls /
Found 6 items
drwxr-xr-x   - hadoop supergroup          0 2020-06-05 02:35 /flink
drwxrwxrwx   - hadoop supergroup          0 2020-06-04 19:27 /hbase
drwxrwxrwx   - hadoop supergroup          0 2020-06-04 17:56 /kylin
drwxrwxrwx   - hadoop supergroup          0 2020-06-03 19:25 /tez
drwxrwxrwx   - hadoop supergroup          0 2020-06-01 02:52 /tmp
drwxrwxrwx   - hadoop supergroup          0 2020-06-04 00:37 /user
hadoop@kylin:/etc/flink/conf$ hadoop fs -chmod -R go+w /flink
hadoop@kylin:/etc/flink/conf$ hadoop fs -ls /flink
Found 1 items
drwxrwxrwx   - hadoop supergroup          0 2020-06-05 02:35 /flink/ha

hadoop@kylin:/etc/flink/conf$ cat masters
localhost
hadoop@kylin:/etc/flink/conf$ vim masters
hadoop@kylin:/etc/flink/conf$ cat masters
localhost:6061
hadoop@kylin:/etc/flink/conf$ cat zoo.cfg | grep -v ^# | grep -v ^$
tickTime=2000
initLimit=10
syncLimit=5
clientPort=2181
server.1=localhost:2888:3888

hadoop@kylin:/etc/flink/conf$ cat /etc/zookeeper/conf/zoo.cfg | grep -v ^# | grep -v ^$
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
clientPort=2181

hadoop@kylin:/etc/flink/conf$ echo "server.1=localhost:2888:3888" >> /etc/zookeeper/conf/zoo.cfg

hadoop@kylin:/etc/flink/conf$ cat /etc/zookeeper/conf/zoo.cfg | grep -v ^# | grep -v ^$
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
clientPort=2181
server.0=localhost:2888:3888

hadoop@kylin:/etc/flink/conf$ cd /opt/flink/current/lib
hadoop@kylin:/opt/flink/current/lib$ wget https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar

hadoop@kylin:/etc/flink/conf$ cat flink-conf.yaml | grep -v ^# | grep -v ^$
jobmanager.rpc.address: localhost
jobmanager.rpc.port: 6123
jobmanager.heap.size: 1024m
taskmanager.memory.process.size: 1728m
taskmanager.numberOfTaskSlots: 1
parallelism.default: 1
high-availability: zookeeper
high-availability.storageDir: hdfs:///flink/ha/
high-availability.zookeeper.quorum: localhost:2181
high-availability.zookeeper.path.root: /flink
yarn.application-attempts: 10
high-availability.zookeeper.client.acl: open
jobmanager.execution.failover-strategy: region
rest.port: 6061
io.tmp.dirs: /tmp

hadoop@kylin:~$ $FLINK_HOME/bin/start-cluster.sh
Starting HA cluster with 1 masters.
Starting standalonesession daemon on host kylin.
Starting taskexecutor daemon on host kylin.

hadoop@kylin:/opt/flink/current$ ./bin/flink run -yjm 1024m -ytm 4096m -ys 2  ./examples/batch/WordCount.jar

hadoop@kylin:/opt/flink/current$ $FLINK_HOME/bin/yarn-session.sh -help
Usage:
   Optional
     -at,--applicationType <arg>     Set a custom application type for the application on YARN
     -D <property=value>             use value for given property
     -d,--detached                   If present, runs the job in detached mode
     -h,--help                       Help for the Yarn session CLI.
     -id,--applicationId <arg>       Attach to running YARN session
     -j,--jar <arg>                  Path to Flink jar file
     -jm,--jobManagerMemory <arg>    Memory for JobManager Container with optional unit (default: MB)
     -m,--jobmanager <arg>           Address of the JobManager (master) to which to connect. Use this flag to connect to a different JobManager than the one specified in the configuration.
     -nl,--nodeLabel <arg>           Specify YARN node label for the YARN application
     -nm,--name <arg>                Set a custom name for the application on YARN
     -q,--query                      Display available YARN resources (memory, cores)
     -qu,--queue <arg>               Specify YARN queue.
     -s,--slots <arg>                Number of slots per TaskManager
     -t,--ship <arg>                 Ship files in the specified directory (t for transfer)
     -tm,--taskManagerMemory <arg>   Memory per TaskManager Container with optional unit (default: MB)
     -yd,--yarndetached              If present, runs the job in detached mode (deprecated; use non-YARN specific option instead)
     -z,--zookeeperNamespace <arg>   Namespace to create the Zookeeper sub-paths for high availability mode
---
yarn-session的参数介绍
  -n : 指定TaskManager的数量;
  -d: 以分离模式运行;
  -id:指定yarn的任务ID;
  -j:Flink jar文件的路径;
  -jm:JobManager容器的内存(默认值:MB);
  -nl:为YARN应用程序指定YARN节点标签;
  -nm:在YARN上为应用程序设置自定义名称;
  -q:显示可用的YARN资源(内存,内核);
  -qu:指定YARN队列;
  -s:指定TaskManager中slot的数量;
  -st:以流模式启动Flink;
  -tm:每个TaskManager容器的内存(默认值:MB);
  -z:命名空间,用于为高可用性模式创建Zookeeper子路径;
---

Tez 0.10.1

# Tez (0.10.1) 支持hadoop3.2.1
# http://tez.apache.org/
root@kylin:~/wget# mkdir Tez-0.10.1
root@kylin:~/wget# cd Tez-0.10.1/
root@kylin:~/wget/Tez-0.10.1# git --version
git version 2.17.1

root@kylin:~/wget/Tez-0.10.1# git clone https://github.com/apache/tez.git
Cloning into 'tez'...
remote: Enumerating objects: 64, done.
remote: Counting objects: 100% (64/64), done.
remote: Compressing objects: 100% (42/42), done.
remote: Total 97137 (delta 11), reused 42 (delta 5), pack-reused 97073
Receiving objects: 100% (97137/97137), 27.03 MiB | 795.00 KiB/s, done.
Resolving deltas: 100% (42789/42789), done.
Checking out files: 100% (1862/1862), done.
# maven 编译 tez源码
root@kylin:~/wget/Tez/Tez-0.10.1# mvn install -Dhadoop.version=3.2.1 -DskipTests -Dmaven.javadoc.skip=true
......
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary for tez 0.10.1-SNAPSHOT:
[INFO]
[INFO] tez ................................................ SUCCESS [  4.663 s]
[INFO] hadoop-shim ........................................ SUCCESS [  2.121 s]
[INFO] tez-api ............................................ SUCCESS [  2.736 s]
[INFO] tez-build-tools .................................... SUCCESS [  0.188 s]
[INFO] tez-common ......................................... SUCCESS [  0.385 s]
[INFO] tez-runtime-internals .............................. SUCCESS [  0.404 s]
[INFO] tez-runtime-library ................................ SUCCESS [  0.878 s]
[INFO] tez-mapreduce ...................................... SUCCESS [  0.676 s]
[INFO] tez-examples ....................................... SUCCESS [  0.328 s]
[INFO] tez-dag ............................................ SUCCESS [  2.846 s]
[INFO] tez-tests .......................................... SUCCESS [  0.727 s]
[INFO] tez-ext-service-tests .............................. SUCCESS [  0.447 s]
[INFO] tez-ui ............................................. SUCCESS [38:56 min]
[INFO] tez-plugins ........................................ SUCCESS [  0.112 s]
[INFO] tez-protobuf-history-plugin ........................ SUCCESS [  8.387 s]
[INFO] tez-yarn-timeline-history .......................... SUCCESS [  2.745 s]
[INFO] tez-yarn-timeline-history-with-acls ................ SUCCESS [  0.931 s]
[INFO] tez-yarn-timeline-cache-plugin ..................... SUCCESS [02:56 min]
[INFO] tez-yarn-timeline-history-with-fs .................. SUCCESS [  0.976 s]
[INFO] tez-history-parser ................................. SUCCESS [ 37.990 s]
[INFO] tez-aux-services ................................... SUCCESS [06:13 min]
[INFO] tez-tools .......................................... SUCCESS [  0.048 s]
[INFO] tez-perf-analyzer .................................. SUCCESS [  0.043 s]
[INFO] tez-job-analyzer ................................... SUCCESS [  2.030 s]
[INFO] tez-javadoc-tools .................................. SUCCESS [  0.693 s]
[INFO] hadoop-shim-impls .................................. SUCCESS [  0.092 s]
[INFO] hadoop-shim-2.8 .................................... SUCCESS [  0.600 s]
[INFO] tez-dist ........................................... SUCCESS [01:45 min]
[INFO] Tez ................................................ SUCCESS [  0.069 s]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time:  51:05 min
[INFO] Finished at: 2020-06-03T17:02:22+08:00
[INFO] ------------------------------------------------------------------------


root@kylin:~/wget/Tez/Tez-0.10.1# ll
total 216
drwxr-xr-x 22 root root  4096 Jun  3 16:06 ./
drwxr-xr-x  3 root root  4096 Jun  3 04:22 ../
drwxr-xr-x  8 root root  4096 Jun  3 03:26 .git/
-rw-r--r--  1 root root   100 Jun  3 03:25 .gitignore
-rw-r--r--  1 root root  1021 Jun  3 03:25 .travis.yml
-rw-r--r--  1 root root  7160 Jun  3 03:25 BUILDING.txt
lrwxrwxrwx  1 root root    33 Jun  3 03:25 INSTALL.md -> docs/src/site/markdown/install.md
-rw-r--r--  1 root root 35592 Jun  3 03:25 KEYS
-rw-r--r--  1 root root 11358 Jun  3 03:25 LICENSE.txt
-rw-r--r--  1 root root   164 Jun  3 03:25 NOTICE.txt
-rw-r--r--  1 root root  1432 Jun  3 03:25 README.md
-rw-r--r--  1 root root  5819 Jun  3 03:25 Tez_DOAP.rdf
drwxr-xr-x  3 root root  4096 Jun  3 03:25 build-tools/
drwxr-xr-x  4 root root  4096 Jun  3 17:02 docs/
drwxr-xr-x  4 root root  4096 Jun  3 12:01 hadoop-shim/
drwxr-xr-x  5 root root  4096 Jun  3 17:00 hadoop-shim-impls/
-rw-r--r--  1 root root 47609 Jun  3 12:29 pom.xml
drwxr-xr-x  3 root root  4096 Jun  3 04:35 target/
drwxr-xr-x  4 root root  4096 Jun  3 12:32 tez-api/
drwxr-xr-x  4 root root  4096 Jun  3 12:32 tez-build-tools/
drwxr-xr-x  4 root root  4096 Jun  3 12:32 tez-common/
drwxr-xr-x  4 root root  4096 Jun  3 13:36 tez-dag/
drwxr-xr-x  5 root root  4096 Jun  3 17:00 tez-dist/
drwxr-xr-x  4 root root  4096 Jun  3 12:35 tez-examples/
drwxr-xr-x  4 root root  4096 Jun  3 14:13 tez-ext-service-tests/
drwxr-xr-x  4 root root  4096 Jun  3 12:34 tez-mapreduce/
drwxr-xr-x 10 root root  4096 Jun  3 16:50 tez-plugins/
drwxr-xr-x  4 root root  4096 Jun  3 12:32 tez-runtime-internals/
drwxr-xr-x  4 root root  4096 Jun  3 12:33 tez-runtime-library/
drwxr-xr-x  4 root root  4096 Jun  3 14:13 tez-tests/
drwxr-xr-x  9 root root  4096 Jun  3 17:00 tez-tools/
drwxr-xr-x  4 root root  4096 Jun  3 16:26 tez-ui/

#编译安装完成后,会在源码目录下的 tez-dist/target/ 中找到编译好的 Tez,其中,有两个版本,
#即 tez-0.10.1-SNAPSHOT-minimal.tar.gz 和 tez-0.10.1-SNAPSHOT.tar.gz,这两个就是我们需要的 Tez 程序。
root@kylin:~/wget/Tez/Tez-0.10.1# cd tez-dist/target/
root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# ll
total 74124
drwxr-xr-x 6 root root     4096 Jun  3 17:02 ./
drwxr-xr-x 5 root root     4096 Jun  3 17:00 ../
drwxr-xr-x 2 root root     4096 Jun  3 17:00 archive-tmp/
drwxr-xr-x 2 root root     4096 Jun  3 17:02 maven-archiver/
drwxr-xr-x 3 root root     4096 Jun  3 17:01 tez-0.10.1-SNAPSHOT/
drwxr-xr-x 3 root root     4096 Jun  3 17:02 tez-0.10.1-SNAPSHOT-minimal/
-rw-r--r-- 1 root root 19097221 Jun  3 17:02 tez-0.10.1-SNAPSHOT-minimal.tar.gz
-rw-r--r-- 1 root root 56772909 Jun  3 17:02 tez-0.10.1-SNAPSHOT.tar.gz
-rw-r--r-- 1 root root     2924 Jun  3 17:02 tez-dist-0.10.1-SNAPSHOT-tests.jar
root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# pwd
/root/wget/Tez/Tez-0.10.1/tez-dist/target

root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# mkdir -p /opt/tez/tez-0.10.1
root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# tar -zxf tez-0.10.1-SNAPSHOT.tar.gz -C /opt/tez/tez-0.10.1/
#root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# tar -zxf tez-0.10.1-SNAPSHOT-minimal.tar.gz -C /opt/tez/
root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# cd /opt/tez/
root@kylin:/opt/tez# ll
total 4508
drwxr-xr-x  4 root root    4096 Jun  3 19:12 ./
drwxr-xr-x 13 root root    4096 Jun  3 19:10 ../
-rw-r--r--  1 root root   15021 Jun  3 03:26 LICENSE
-rw-r--r--  1 root root    1465 Jun  3 03:26 LICENSE-BSD-3clause
-rw-r--r--  1 root root   36261 Jun  3 03:26 LICENSE-CDDLv1.1-GPLv2_withCPE
-rw-r--r--  1 root root    1045 Jun  3 03:26 LICENSE-MIT
-rw-r--r--  1 root root    4128 Jun  3 03:26 LICENSE-SIL_OpenFontLicense-v1.1
-rw-r--r--  1 root root     804 Jun  3 03:26 NOTICE
-rw-r--r--  1 root root    8863 Jun  3 12:02 hadoop-shim-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root    6415 Jun  3 17:00 hadoop-shim-2.8-0.10.1-SNAPSHOT.jar
drwxr-xr-x  2 root root    4096 Jun  3 19:12 lib/
drwxr-xr-x  3 root root    4096 Jun  3 19:11 tez-0.10.1/
-rw-r--r--  1 root root 1090694 Jun  3 16:11 tez-api-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root    5468 Jun  3 12:32 tez-build-tools-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root   87490 Jun  3 12:32 tez-common-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root 1452967 Jun  3 16:11 tez-dag-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root   56861 Jun  3 12:35 tez-examples-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root   79220 Jun  3 16:53 tez-history-parser-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root   15328 Jun  3 17:00 tez-javadoc-tools-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root   78996 Jun  3 17:00 tez-job-analyzer-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root  299473 Jun  3 12:35 tez-mapreduce-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root   80514 Jun  3 16:50 tez-protobuf-history-plugin-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root  201472 Jun  3 12:32 tez-runtime-internals-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root  797815 Jun  3 12:33 tez-runtime-library-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root  159136 Jun  3 14:13 tez-tests-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root   11533 Jun  3 16:51 tez-yarn-timeline-cache-plugin-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root   28229 Jun  3 16:50 tez-yarn-timeline-history-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root    7816 Jun  3 16:50 tez-yarn-timeline-history-with-acls-0.10.1-SNAPSHOT.jar
-rw-r--r--  1 root root   23670 Jun  3 16:53 tez-yarn-timeline-history-with-fs-0.10.1-SNAPSHOT.jar
root@kylin:/opt/tez# chown -R hadoop:hadoop /opt/tez/

root@kylin:/opt/tez# su hadoop
hadoop@kylin:~$ cd /opt/tez/
hadoop@kylin:/opt/tez$ hadoop fs -mkdir /tez
hadoop@kylin:/opt/tez$ hadoop fs -put tez-0.10.1 /tez
hadoop@kylin:/opt/tez$ hadoop fs -chmod -R 777 /tez

hadoop@kylin:/opt/tez$ cd /etc/hive/conf/
hadoop@kylin:/etc/hive/conf$ vim tez-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
        <property>
                <name>tez.lib.uris</name>
                <value>${fs.defaultFS}/tez/tez-0.10.1,${fs.defaultFS}/tez/tez-0.10.1/lib</value>
        </property>
        <property>
                <name>tez.lib.uris.classpath</name>
                <value>${fs.defaultFS}/tez/tez-0.10.1,${fs.defaultFS}/tez/tez-0.10.1/lib</value>
        </property>
        <property>
                <name>tez.use.cluster.hadoop-libs</name>
                <value>true</value>
        </property>
        <property>
                <name>tez.am.resource.memory.mb</name>
                <value>2048</value>
        </property>
        <property>
                <name>tez.am.resource.cpu.vcores</name>
                <value>2</value>
        </property>
</configuration>

# 设置 Hive 的默认引擎为 tez
hadoop@kylin:/etc/hive/conf$ vim hive-site.xml
---
        <property>
                <name>hive.execution.engine</name>
                <value>tez</value>
        </property>
---

root@kylin:~/wget# tar -zxf tez-0.10.1-SNAPSHOT-minimal.tar.gz -C /opt/tez/tez-0.10.1/
root@kylin:~/wget# chown -R hadoop:hadoop /opt/tez/
hadoop@kylin:/opt/tez$ ln -s tez-0.10.1 current

hadoop@kylin:/opt/tez$ vim /home/hadoop/.bashrc
---
# Tez
export TEZ_HOME=/opt/tez/current
---
hadoop@kylin:/opt/tez$ source /home/hadoop/.bashrc

hadoop@kylin:/opt/tez$ cd /etc/hive/conf/
hadoop@kylin:/etc/hive/conf$ vim hive-env.sh
---
# Tez
export TEZ_HOME=/opt/tez/current
export TEZ_JARS=""
for jar in `ls $TEZ_HOME |grep jar`; do
        export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/$jar
done
for jar in `ls $TEZ_HOME/lib`; do
        export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/lib/$jar
done
export HADOOP_CLASSPATH=${TEZ_JARS}/*:${TEZ_JARS}/lib/*
---

hadoop@kylin:/etc/hive/conf$ cd /opt/hive/current/conf/
hadoop@kylin:/opt/hive/current/conf$ ln -s /etc/hive/conf/tez-site.xml tez-site.xml

---
hadoop@kylin:~$ beeline
Beeline version 3.1.2 by Apache Hive
beeline> !connect jdbc:hive2://hadoop:10000/;auth=noSasl
Connecting to jdbc:hive2://hadoop:10000/;auth=noSasl
Enter username for jdbc:hive2://hadoop:10000/: user2
Enter password for jdbc:hive2://hadoop:10000/: *******
Connected to: Apache Hive (version 3.1.2)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://hadoop:10000/> show databases;
+----------------+
| database_name  |
+----------------+
| default        |
| test_bdw       |
+----------------+
2 rows selected (0.865 seconds)
0: jdbc:hive2://hadoop:10000/> use default;
No rows affected (0.155 seconds)
0: jdbc:hive2://hadoop:10000/> show tables;
+---------------------------+
|         tab_name          |
+---------------------------+
| kylin_account             |
| kylin_cal_dt              |
| kylin_category_groupings  |
| kylin_country             |
| kylin_sales               |
+---------------------------+
5 rows selected (0.178 seconds)
0: jdbc:hive2://hadoop:10000/> select count(*) from kylin_sales;
Error: Error while processing statement: FAILED: Execution Error, return code -101 from org.apache.hadoop.hive.ql.exec.tez.TezTask. tried to access field com.google.protobuf.AbstractMessage.memoizedSize from class org.apache.tez.dag.api.records.DAGProtos$ConfigurationProto (state=08S01,code=-101)
0: jdbc:hive2://hadoop:10000/> set hive.execution.engine;
+----------------------------+
|            set             |
+----------------------------+
| hive.execution.engine=tez  |
+----------------------------+
1 row selected (0.023 seconds)
0: jdbc:hive2://hadoop:10000/> set hive.execution.engine=mr;
No rows affected (0.024 seconds)
0: jdbc:hive2://hadoop:10000/> select count(*) from kylin_sales;
+--------+
|  _c0   |
+--------+
| 10000  |
+--------+
1 row selected (114.402 seconds)
0: jdbc:hive2://hadoop:10000/> !quit
Closing: 0: jdbc:hive2://hadoop:10000/;auth=noSasl

启动服务

hadoop@kylin:~$ cat hadoopstart.sh
#!/bin/bash

##############################
#Hadoopstart.sh
#Author kylin
##############################
# Start Hadoop
echo "Start Hadoop..."
echo "[ namenode;secondarynamenode;datanode;resourcemanager;nodemanager;proxyserver;historyserver ]"
$HADOOP_HOME/bin/hdfs --daemon start namenode
$HADOOP_HOME/bin/hdfs --daemon start secondarynamenode
$HADOOP_HOME/bin/hdfs --daemon start datanode
$HADOOP_HOME/bin/yarn --daemon start resourcemanager
$HADOOP_HOME/bin/yarn --daemon start nodemanager
$HADOOP_HOME/bin/yarn --daemon start proxyserver
$HADOOP_HOME/bin/mapred --daemon start historyserver

# Start Hive
echo "Start Hive..."
echo "[ metastore;hiveserver2 ]"
# cd $HIVE_HOME
# $HIVE_HOME/bin/hiveserver2 >/dev/null 2>&1 &
# $HIVE_HOME/hcatalog/sbin/hcat_server.sh start
# $HIVE_HOME/hcatalog/bin/hcat
# $HIVE_HOME/hcatalog/sbin/webhcat_server.sh start
nohup hive --service metastore > $HIVE_HOME/logs/metastore.log 2>&1 &
nohup hive --service hiveserver2 > $HIVE_HOME/logs/hiveserver2.log 2>&1 &

# Start Zookeeper
echo "Start Zookeeper..."
$ZK_HOME/bin/zkServer.sh start

# Start HBase
echo "Start HBase..."
echo "[ master;regionserver ]"
$HBASE_HOME/bin/hbase-daemon.sh start master
$HBASE_HOME/bin/hbase-daemon.sh start regionserver

# Start Kafka
echo "Start Kafka..."
echo "[ kafka0;kafka1;kafka2 ]"
nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server.properties > $KAFKA_HOME/logs/nohup.out 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server-1.properties > $KAFKA_HOME/logs/nohup.out-1 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server-2.properties > $KAFKA_HOME/logs/nohup.out-2 2>&1 &

# Start Spark
echo "Start Spark..."
echo "[ master;worker ]"
$SPARK_HOME/sbin/start-master.sh
$SPARK_HOME/sbin/start-slave.sh spark://kylin.localdomain:7077

# Satrt Flink
echo "Start Flink..."
echo "[ jobmanager;taskmanager ]"
$FLINK_HOME/bin/start-cluster.sh
# $FLINK_HOME/bin/jobmanager.sh start
# $FLINK_HOME/bin/taskmanager.sh start

# Start Kylin
echo "Start Kylin..."
#$KYLIN_HOME/bin/check-env.sh
nohup $KYLIN_HOME/bin/kylin.sh start > $KYLIN_HOME/logs/nohup.out-startkylin.log 2>&1 &
sleep 3m
Info=`cat /opt/kylin/current/logs/nohup.out-startkylin.log | awk 'END{print}' | awk '{print $1}'`
if [[ $Info = Web ]];then
        echo "A new Kylin instance is started by hadoop. To stop it, run '$KYLIN_HOME/bin/kylin.sh stop'"
        echo "Check the log at /opt/kylin/current/logs/nohup.out-startkylin.log"
        echo "Web UI is at http://kylin.localdomain:7070/kylin"
else
        if [[ $Info = ERROR: ]];then
                nohup $KYLIN_HOME/bin/kylin.sh start > $KYLIN_HOME/logs/nohup.out-startkylin.log 2>&1 &
                sleep 3m
                END=`cat /opt/kylin/current/logs/nohup.out-startkylin.log | awk 'END{print}'`
                echo "$END"
        else
                echo "$Info"
        fi
fi

/bin/bash /home/hadoop/hadoopstart.sh

关闭服务

hadoop@kylin:~$ cat hadoopstop.sh
#!/bin/bash

##############################
#Hadoopstop.sh
#Author kylin
##############################
echo "Stop Kylin..."
# Stop Kylin
$KYLIN_HOME/bin/kylin.sh stop

# Stop Flink
echo "Stop Flink..."
$FLINK_HOME/bin/stop-cluster.sh
# $FLINK_HOME/bin/jobmanager.sh stop
# $FLINK_HOME/bin/taskmanager.sh stop

echo "Stop Spark..."
# Stop Spark
$SPARK_HOME/sbin/stop-slave.sh spark://kylin.localdomain:7077
$SPARK_HOME/sbin/stop-master.sh

echo "Stop Kafka..."
# Stop Kafka
nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server.properties > $KAFKA_HOME/logs/nohup.out 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server-1.properties > $KAFKA_HOME/logs/nohup.out-1 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server-2.properties > $KAFKA_HOME/logs/nohup.out-2 2>&1 &
sleep 3s
#jps | grep Kafka | awk '{print $1}' | xargs kill -9

echo "Stop HBase..."
# Stop HBase
$HBASE_HOME/bin/hbase-daemon.sh stop master
$HBASE_HOME/bin/hbase-daemon.sh stop regionserver

echo "Stop Zookeeper..."
# Stop Zookeeper
$ZK_HOME/bin/zkServer.sh stop

echo "Stop Hive..."
# Kill Hiveserver
cd $HIVE_HOME
# $HIVE_HOME/hcatalog/sbin/webhcat_server.sh stop
# $HIVE_HOME/hcatalog/sbin/hcat_server.sh stop
jps | grep RunJar | awk '{print $1}' | xargs kill -9

echo "Stop Hadoop..."
# Stop Hadoop
$HADOOP_HOME/bin/mapred --daemon stop historyserver
$HADOOP_HOME/bin/yarn --daemon stop proxyserver
$HADOOP_HOME/bin/yarn --daemon stop nodemanager
$HADOOP_HOME/bin/yarn --daemon stop resourcemanager
$HADOOP_HOME/bin/hdfs --daemon stop datanode
$HADOOP_HOME/bin/hdfs --daemon stop secondarynamenode
$HADOOP_HOME/bin/hdfs --daemon stop namenode

/bin/bash /home/hadoop/hadoopstop.sh

你可能感兴趣的:(Hadoop,Kylin,Spark)