参考Windows子系统WSL 2 部署与应用
root@kylin:~# sudo apt update && sudo apt upgrade
root@kylin:~# cd /root
root@kylin:~# vim /etc/profile
root@kylin:~# sudo apt install ssh
root@kylin:~# service ssh start
root@kylin:~# sshd -T
root@kylin:~# sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config
root@kylin:~# sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
root@kylin:~# service ssh restart
root@kylin:~# ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key
root@kylin:~# ssh-keygen -t dsa -f /etc/ssh/ssh_host_ecdsa_key
root@kylin:~# ssh-keygen -t dsa -f /etc/ssh/ssh_host_ed25519_key
root@kylin:~# service ssh restart
root@kylin:~# ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key
root@kylin:~# service ssh restart
root@kylin:~# service ssh status
# 配置本地ssh免密登陆
root@kylin:~# ssh-keygen -t rsa
root@kylin:~# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
root@kylin:~# chmod 0600 ~/.ssh/authorized_keys
root@kylin:~# ssh localhost
# 查看 SELinux状态
root@kylin:~# getenforce
Command 'getenforce' not found, but can be installed with:
apt install selinux-utils
# 禁用透明大页(THP)
root@kylin:~# echo never > /sys/kernel/mm/transparent_hugepage/enabled
root@kylin:~# echo never > /sys/kernel/mm/transparent_hugepage/defrag
root@kylin:~# echo 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' >> /etc/rc.local
root@kylin:~# echo 'echo never > /sys/kernel/mm/transparent_hugepage/defrag' >> /etc/rc.local
# 禁用交换分区(swap)
root@kylin:~# sudo sysctl vm.swappiness=1
vm.swappiness = 1
root@kylin:~# echo 'vm.swappiness=1' >> /etc/sysctl.conf
#Mysql相关
root@kylin:~# sudo apt-get install mysql-server mysql-client libmysql-java
root@kylin:~# cat /etc/mysql/my.cnf
#
# The MySQL database server configuration file.
#
# You can copy this to one of:
# - "/etc/mysql/my.cnf" to set global options,
# - "~/.my.cnf" to set user-specific options.
#
# One can use all long options that the program supports.
# Run program with --help to get a list of available options and with
# --print-defaults to see which it would actually understand and use.
#
# For explanations see
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html
#
# * IMPORTANT: Additional settings that can override those from this file!
# The files must end with '.cnf', otherwise they'll be ignored.
#
!includedir /etc/mysql/conf.d/
!includedir /etc/mysql/mysql.conf.d/
root@kylin:~# ll /etc/mysql/mysql.conf.d/
total 24
drwxr-xr-x 2 root root 4096 May 28 18:52 ./
drwxr-xr-x 4 root root 4096 May 28 18:41 ../
-rw-r--r-- 1 root root 4097 May 28 18:52 mysqld.cnf
-rw-r--r-- 1 root root 3054 May 28 18:44 mysqld.cnf.bak
-rw-r--r-- 1 root root 21 Jan 12 2018 mysqld_safe_syslog.cnf
root@kylin:~# cat /etc/mysql/mysql.conf.d/mysqld.cnf
#
# The MySQL database server configuration file.
#
# You can copy this to one of:
# - "/etc/mysql/my.cnf" to set global options,
# - "~/.my.cnf" to set user-specific options.
#
# One can use all long options that the program supports.
# Run program with --help to get a list of available options and with
# --print-defaults to see which it would actually understand and use.
#
# For explanations see
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html
# This will be passed to all mysql clients
# It has been reported that passwords should be enclosed with ticks/quotes
# escpecially if they contain "#" chars...
# Remember to edit /etc/mysql/debian.cnf when changing the socket location.
# Here is entries for some specific programs
# The following values assume you have at least 32M ram
[mysqld_safe]
socket = /var/run/mysqld/mysqld.sock
nice = 0
#log-error=/var/log/mysqld.log
#pid-file=/var/run/mysqld/mysqld.pid
sql_mode=STRICT_ALL_TABLES
[mysqld]
#
# * Basic Settings
#
user = mysql
pid-file = /var/run/mysqld/mysqld.pid
socket = /var/run/mysqld/mysqld.sock
port = 3306
basedir = /usr
datadir = /var/lib/mysql
tmpdir = /tmp
lc-messages-dir = /usr/share/mysql
skip-external-locking
# Here is an option file with Cloudera recommended settings:
#datadir=/var/lib/mysql
#socket=/var/lib/mysql/mysql.sock
transaction-isolation = READ-COMMITTED
symbolic-links = 0
key_buffer_size = 32M
#max_allowed_packet = 32M
thread_stack = 256K
thread_cache_size = 64
query_cache_limit = 8M
query_cache_size = 64M
query_cache_type = 1
max_connections = 550
log_bin=/var/lib/mysql/mysql_binary_log
server_id=1
binlog_format = mixed
read_buffer_size = 2M
read_rnd_buffer_size = 16M
sort_buffer_size = 8M
join_buffer_size = 8M
# InnoDB settings
innodb_file_per_table = 1
innodb_flush_log_at_trx_commit = 2
innodb_log_buffer_size = 64M
innodb_buffer_pool_size = 4G
innodb_thread_concurrency = 8
innodb_flush_method = O_DIRECT
innodb_log_file_size = 512M
#
# Instead of skip-networking the default is now to listen only on
# localhost which is more compatible and is not less secure.
#bind-address = 127.0.0.1
#
# * Fine Tuning
#
#key_buffer_size = 16M
#max_allowed_packet = 16M
#thread_stack = 192K
#thread_cache_size = 8
# This replaces the startup script and checks MyISAM tables if needed
# the first time they are touched
myisam-recover-options = BACKUP
#max_connections = 100
#table_open_cache = 64
#thread_concurrency = 10
#
# * Query Cache Configuration
#
#query_cache_limit = 1M
#query_cache_size = 16M
#
# * Logging and Replication
#
# Both location gets rotated by the cronjob.
# Be aware that this log type is a performance killer.
# As of 5.1 you can enable the log at runtime!
#general_log_file = /var/log/mysql/mysql.log
#general_log = 1
#
# Error log - should be very few entries.
#
log_error = /var/log/mysql/error.log
#
# Here you can see queries with especially long duration
#slow_query_log = 1
#slow_query_log_file = /var/log/mysql/mysql-slow.log
#long_query_time = 2
#log-queries-not-using-indexes
#
# The following can be used as easy to replay backup logs or for replication.
# note: if you are setting up a replication slave, see README.Debian about
# other settings you may need to change.
#server-id = 1
#log_bin = /var/log/mysql/mysql-bin.log
expire_logs_days = 10
max_binlog_size = 100M
#binlog_do_db = include_database_name
#binlog_ignore_db = include_database_name
#
# * InnoDB
#
# InnoDB is enabled by default with a 10MB datafile in /var/lib/mysql/.
# Read the manual for more InnoDB related options. There are many!
#
# * Security Features
#
# Read the manual, too, if you want chroot!
# chroot = /var/lib/mysql/
#
# For generating SSL certificates I recommend the OpenSSL GUI "tinyca".
#
# ssl-ca=/etc/mysql/cacert.pem
# ssl-cert=/etc/mysql/server-cert.pem
# ssl-key=/etc/mysql/server-key.pemroot@kylin:~# cat /etc/mysql/my.cnf
#
# The MySQL database server configuration file.
#
# You can copy this to one of:
# - "/etc/mysql/my.cnf" to set global options,
# - "~/.my.cnf" to set user-specific options.
#
# One can use all long options that the program supports.
# Run program with --help to get a list of available options and with
# --print-defaults to see which it would actually understand and use.
#
# For explanations see
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html
#
# * IMPORTANT: Additional settings that can override those from this file!
# The files must end with '.cnf', otherwise they'll be ignored.
#
!includedir /etc/mysql/conf.d/
!includedir /etc/mysql/mysql.conf.d/
root@kylin:~# ll /etc/mysql/mysql.conf.d/
total 24
drwxr-xr-x 2 root root 4096 May 28 18:52 ./
drwxr-xr-x 4 root root 4096 May 28 18:41 ../
-rw-r--r-- 1 root root 4097 May 28 18:52 mysqld.cnf
-rw-r--r-- 1 root root 3054 May 28 18:44 mysqld.cnf.bak
-rw-r--r-- 1 root root 21 Jan 12 2018 mysqld_safe_syslog.cnf
root@kylin:~# cat /etc/mysql/mysql.conf.d/mysqld.cnf
#
# The MySQL database server configuration file.
#
# You can copy this to one of:
# - "/etc/mysql/my.cnf" to set global options,
# - "~/.my.cnf" to set user-specific options.
#
# One can use all long options that the program supports.
# Run program with --help to get a list of available options and with
# --print-defaults to see which it would actually understand and use.
#
# For explanations see
# http://dev.mysql.com/doc/mysql/en/server-system-variables.html
# This will be passed to all mysql clients
# It has been reported that passwords should be enclosed with ticks/quotes
# escpecially if they contain "#" chars...
# Remember to edit /etc/mysql/debian.cnf when changing the socket location.
# Here is entries for some specific programs
# The following values assume you have at least 32M ram
[mysqld_safe]
socket = /var/run/mysqld/mysqld.sock
nice = 0
#log-error=/var/log/mysqld.log
#pid-file=/var/run/mysqld/mysqld.pid
sql_mode=STRICT_ALL_TABLES
[mysqld]
#
# * Basic Settings
#
user = mysql
pid-file = /var/run/mysqld/mysqld.pid
socket = /var/run/mysqld/mysqld.sock
port = 3306
basedir = /usr
datadir = /var/lib/mysql
tmpdir = /tmp
lc-messages-dir = /usr/share/mysql
skip-external-locking
# Here is an option file with Cloudera recommended settings:
#datadir=/var/lib/mysql
#socket=/var/lib/mysql/mysql.sock
transaction-isolation = READ-COMMITTED
symbolic-links = 0
key_buffer_size = 32M
#max_allowed_packet = 32M
thread_stack = 256K
thread_cache_size = 64
query_cache_limit = 8M
query_cache_size = 64M
query_cache_type = 1
max_connections = 550
log_bin=/var/lib/mysql/mysql_binary_log
server_id=1
binlog_format = mixed
read_buffer_size = 2M
read_rnd_buffer_size = 16M
sort_buffer_size = 8M
join_buffer_size = 8M
# InnoDB settings
innodb_file_per_table = 1
innodb_flush_log_at_trx_commit = 2
innodb_log_buffer_size = 64M
innodb_buffer_pool_size = 4G
innodb_thread_concurrency = 8
innodb_flush_method = O_DIRECT
innodb_log_file_size = 512M
#
# Instead of skip-networking the default is now to listen only on
# localhost which is more compatible and is not less secure.
#bind-address = 127.0.0.1
#
# * Fine Tuning
#
#key_buffer_size = 16M
#max_allowed_packet = 16M
#thread_stack = 192K
#thread_cache_size = 8
# This replaces the startup script and checks MyISAM tables if needed
# the first time they are touched
myisam-recover-options = BACKUP
#max_connections = 100
#table_open_cache = 64
#thread_concurrency = 10
#
# * Query Cache Configuration
#
#query_cache_limit = 1M
#query_cache_size = 16M
#
# * Logging and Replication
#
# Both location gets rotated by the cronjob.
# Be aware that this log type is a performance killer.
# As of 5.1 you can enable the log at runtime!
#general_log_file = /var/log/mysql/mysql.log
#general_log = 1
#
# Error log - should be very few entries.
#
log_error = /var/log/mysql/error.log
#
# Here you can see queries with especially long duration
#slow_query_log = 1
#slow_query_log_file = /var/log/mysql/mysql-slow.log
#long_query_time = 2
#log-queries-not-using-indexes
#
# The following can be used as easy to replay backup logs or for replication.
# note: if you are setting up a replication slave, see README.Debian about
# other settings you may need to change.
#server-id = 1
#log_bin = /var/log/mysql/mysql-bin.log
expire_logs_days = 10
max_binlog_size = 100M
#binlog_do_db = include_database_name
#binlog_ignore_db = include_database_name
#
# * InnoDB
#
# InnoDB is enabled by default with a 10MB datafile in /var/lib/mysql/.
# Read the manual for more InnoDB related options. There are many!
#
# * Security Features
#
# Read the manual, too, if you want chroot!
# chroot = /var/lib/mysql/
#
# For generating SSL certificates I recommend the OpenSSL GUI "tinyca".
#
# ssl-ca=/etc/mysql/cacert.pem
# ssl-cert=/etc/mysql/server-cert.pem
# ssl-key=/etc/mysql/server-key.pem
root@kylin:~# service mysql start
* Starting MySQL database server mysqld
No directory, logging in with HOME=/
mkdir: cannot create directory ‘//.cache’: Permission denied
-su: 19: /etc/profile.d/wsl-integration.sh: cannot create //.cache/wslu/integration: Directory nonexistent
# 上述问题解决,将启动mysql处的 su mysql 换为 su root,问题解决!
root@kylin:/# vim /etc/init.d/MySQL
---
# Start MySQL!
#su - mysql -s /bin/sh -c "/usr/bin/mysqld_safe > /dev/null 2>&1 &"
su - root -s /bin/sh -c "/usr/bin/mysqld_safe > /dev/null 2>&1 &"
---
root@kylin:~# service mysql stop
* Stopping MySQL database server mysqld [ OK ]
root@kylin:~# service mysql start
* Starting MySQL database server mysqld [ OK ]
[ OK ]
root@kylin:~# mysql
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 4
Server version: 5.7.30-0ubuntu0.18.04.1-log (Ubuntu)
Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| ib_logfile |
| mysql |
| performance_schema |
| sys |
+--------------------+
5 rows in set (0.01 sec)
mysql> use mysql;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
mysql> show tables;
+---------------------------+
| Tables_in_mysql |
+---------------------------+
| columns_priv |
| db |
| engine_cost |
| event |
| func |
| general_log |
| gtid_executed |
| help_category |
| help_keyword |
| help_relation |
| help_topic |
| innodb_index_stats |
| innodb_table_stats |
| ndb_binlog_index |
| plugin |
| proc |
| procs_priv |
| proxies_priv |
| server_cost |
| servers |
| slave_master_info |
| slave_relay_log_info |
| slave_worker_info |
| slow_log |
| tables_priv |
| time_zone |
| time_zone_leap_second |
| time_zone_name |
| time_zone_transition |
| time_zone_transition_type |
| user |
+---------------------------+
31 rows in set (0.00 sec)
mysql> select host,user,authentication_string,plugin from mysql.user;
+-----------+------------------+-------------------------------------------+-----------------------+
| host | user | authentication_string | plugin |
+-----------+------------------+-------------------------------------------+-----------------------+
| localhost | root | | auth_socket |
| localhost | mysql.session | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | mysql.sys | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | debian-sys-maint | *B886BE6360B8C1E19C15E8FF311E2588171905BE | mysql_native_password |
+-----------+------------------+-------------------------------------------+-----------------------+
4 rows in set (0.00 sec)
mysql> update mysql.user set authentication_string=password('123456'),plugin="mysql_native_password" where user='root' and host ='localhost';
Query OK, 1 row affected, 1 warning (0.01 sec)
Rows matched: 1 Changed: 1 Warnings: 1
mysql> select host,user,authentication_string,plugin from mysql.user;
+-----------+------------------+-------------------------------------------+-----------------------+
| host | user | authentication_string | plugin |
+-----------+------------------+-------------------------------------------+-----------------------+
| localhost | root | *6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9 | mysql_native_password |
| localhost | mysql.session | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | mysql.sys | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | debian-sys-maint | *B886BE6360B8C1E19C15E8FF311E2588171905BE | mysql_native_password |
+-----------+------------------+-------------------------------------------+-----------------------+
4 rows in set (0.00 sec)
mysql> GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123456' WITH GRANT OPTION;
Query OK, 0 rows affected, 1 warning (0.01 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.02 sec)
mysql> SHOW GRANTS FOR 'root'@'%';
+-------------------------------------------------------------+
| Grants for root@% |
+-------------------------------------------------------------+
| GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION |
+-------------------------------------------------------------+
1 row in set (0.00 sec)
mysql> exit;
Bye
root@kylin:~# netstat -an | grep 3306
tcp6 0 0 :::3306 :::* LISTEN
root@kylin:~# mysql -u root --password='123456' -e "select host,user,authentication_string,plugin from mysql.user"
mysql: [Warning] Using a password on the command line interface can be insecure.
+-----------+------------------+-------------------------------------------+-----------------------+
| host | user | authentication_string | plugin |
+-----------+------------------+-------------------------------------------+-----------------------+
| localhost | root | *6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9 | mysql_native_password |
| localhost | mysql.session | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | mysql.sys | *THISISNOTAVALIDPASSWORDTHATCANBEUSEDHERE | mysql_native_password |
| localhost | debian-sys-maint | *B886BE6360B8C1E19C15E8FF311E2588171905BE | mysql_native_password |
| % | root | *6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9 | mysql_native_password |
+-----------+------------------+-------------------------------------------+-----------------------+
root@kylin:~# systemctl status mysql
System has not been booted with systemd as init system (PID 1). Can`t operate.
# 这是因为WSL是Windows的一个子服务,终端中无法重启Windows的服务.
# 我们无法使用reboot或使用systemctl命令来管理systemd服务。
每次重启系统时执行,方便管理服务和IP
hadoop@kylin:/etc/hadoop/conf$ ifconfig eth0 | egrep -o "inet [^ ]*" | grep -o "[0-9.]*"
echo "$newip hadoop" >> /mnt/c/Windows/System32/drivers/etc/hosts
echo "$newip hadoop" >> /etc/hosts
------------------------------------------------------------------------------------------------------------------------
root@kylin:~# cat getip
#!/bin/bash
#getip
##############################
#WSL(Ubuntu) Ip Change
#Author kylin
##############################
newip=`ifconfig eth0 | egrep -o "inet [^ ]*" | grep -o "[0-9.]*"`
oldip=`cat /root/oldip | grep -o "[0-9.]*"`
if [[ $newip = $oldip ]];then
sudo echo "Same IP address."
sudo echo "Old ip is $oldip"
else #windows下设置了对hosts文件的修改写入权限,多etc/文件夹下创建文件的权限没有成功,所以直接sed时创建临时文件不成功,因此采用下面办法!
sudo cp /mnt/c/Windows/System32/drivers/etc/hosts /tmp/
sudo sed -i "s/${oldip}/$newip/" /tmp/hosts
sudo cat /tmp/hosts > /mnt/c/Windows/System32/drivers/etc/hosts
sudo sed -i "s/${oldip}/$newip/" /etc/hosts
sudo echo "New ip is $newip"
sudo echo "$newip" > /root/oldip
fi
------------------------------------------------------------------------------------------------------------------------
#优化getip shell
#!/bin/bash
#getip
##############################
#WSL(Ubuntu) Ip Change
#Author kylin
##############################
newip=`ifconfig eth0 | egrep -o "inet [^ ]*" | grep -o "[0-9.]*"`
oldip=`cat /root/oldip | grep -o "[0-9.]*"`
hostname=`cat /etc/hosts | grep hadoop | grep -o "[a-z]*"`
if [[ $hostname = hadoop ]];then
if [[ $newip = $oldip ]];then
sudo echo "Same IP address."
sudo echo "Old ip is $oldip"
else
sudo cp /mnt/c/Windows/System32/drivers/etc/hosts /tmp/
sudo sed -i "s/${oldip}/$newip/" /tmp/hosts
sudo cat /tmp/hosts > /mnt/c/Windows/System32/drivers/etc/hosts
sudo sed -i "s/${oldip}/$newip/" /etc/hosts
sudo echo "New ip is $newip"
sudo echo "$newip" > /root/oldip
fi
else
sudo echo "$newip hadoop" >> /mnt/c/Windows/System32/drivers/etc/hosts
sudo echo "$newip hadoop" >> /etc/hosts
sudo echo "Add IP $newip is Succeed!"
fi
------------------------------------------------------------------------------------------------------------------------
root@kylin:~# cat initservice.sh
#!/bin/bash
#InitService
##############################
#Linux(Ubuntu) Service Enable
#Author kylin
##############################
WORK_DIR=$(pwd)
#Only root
[[ $EUID -ne 0 ]] && echo 'Error: This script must be run as root!' && exit 1
#IP Addr.
#ip=`ifconfig eth0 | egrep -o "inet [^ ]*" | grep -o "[0-9.]*"`
#echo "$ip cloudera.kylin.com cloudera" >> /etc/hosts
cd ${WORK_DIR}
/bin/bash ./getip.sh
#Enable ssh
service ssh start
#Disnable ufw
service ufw stop
#Enable Httpd
#service apache2 restart > /dev/null 2>&1
#service apache2 start
#Enable mysql service
service mysql start
root@kylin:~# mkdir scpfile
root@kylin:~# scp [email protected]:/root/scpfile/packages/jdk-8u181-linux-x64.tar.gz ./scpfile
root@kylin:~# cd scpfile/
root@kylin:~/scpfile# mkdir -p /usr/java
root@kylin:~/scpfile# tar -zxf jdk-8u181-linux-x64.tar.gz -C /usr/java/
root@kylin:~/scpfile# cd /usr/java/
root@kylin:/usr/java# chown -R root:root jdk1.8.0_181/
root@kylin:/usr/java# ln -sf /usr/java/jdk1.8.0_181 /usr/java/latest
root@kylin:/usr/java# ln -sf /usr/java/latest /usr/java/default
root@kylin:/usr/java# sudo vim /etc/profile
---
export JAVA_HOME=/usr/java/default
export PATH=$JAVA_HOME/bin:$PATH
Shift+ZZ
---
root@kylin:/usr/java# source /etc/profile
root@kylin:/usr/java# java -version
# hadoop 3.2.1
root@kylin:/usr/java# cd /root
root@kylin:~# mkdir wget
root@kylin:~# cd wget/
root@kylin:~/wget# sudo wget https://mirror.bit.edu.cn/apache/hadoop/core/hadoop-3.2.1/hadoop-3.2.1.tar.gz
root@kylin:~/wget# mkdir /opt/hadoop
root@kylin:~/wget# tar -zxf hadoop-3.2.1.tar.gz -C /opt/hadoop/
root@kylin:~/wget# cd /root
root@kylin:~# sudo groupadd hadoop
root@kylin:~# sudo useradd -s /bin/bash -g hadoop -d /home/hadoop -m hadoop
root@kylin:~# id hadoop
uid=1001(hadoop) gid=1001(hadoop) groups=1001(hadoop)
root@kylin:~# cd /opt/hadoop
root@kylin:/opt/hadoop# ln -s hadoop-3.2.1 current
root@kylin:/opt/hadoop# chown -R hadoop:hadoop /opt/hadoop
root@kylin:/opt/hadoop# cd /root/
root@kylin:~# mkdir /etc/hadoop
root@kylin:~# cp -r /opt/hadoop/current/etc/hadoop /etc/hadoop/conf
root@kylin:~# chown -R hadoop:hadoop /etc/hadoop
root@kylin:~# vim /home/hadoop/.bashrc
---
# User specific aliases and functions
export JAVA_HOME=/usr/java/default
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME=/opt/hadoop/current
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}
export HTTPFS_CATALINA_HOME=${HADOOP_HOME}/share/hadoop/httpfs/tomcat
export CATALINA_BASE=${HTTPFS_CATALINA_HOME}
export HADOOP_CONF_DIR=/etc/hadoop/conf
export HTTPFS_CONFIG=/etc/hadoop/conf
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
---
root@kylin:~# vim /etc/hadoop/conf/core-site.xml
---
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
---
root@kylin:~# vim /etc/hadoop/conf/hdfs-site.xml
---
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
---
root@kylin:~# su hadoop
hadoop@kylin:/root$ cd /home/hadoop/
hadoop@kylin:~$ cd /opt/hadoop/current
hadoop@kylin:/opt/hadoop/current$ bin/hdfs namenode -format
# 进行多次格式化导致DataNode服务启动不成功解决办法
# 查看DataNode日志,然后删除current目录
# hadoop@kylin:/tmp/hadoop-hadoop/dfs/data$ rm -rf current/
hadoop@kylin:/opt/hadoop/current$ cd /etc/hadoop/conf/
hadoop@kylin:~$ hdfs --daemon start namenode
hadoop@kylin:~$ jps
4629 NameNode
hadoop@kylin:~$ hdfs --daemon start secondarynamenode
hadoop@kylin:~$ hdfs --daemon start datanode
hadoop@kylin:~$ vim /etc/hadoop/conf/mapred-site.xml
---
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
</configuration>
---
hadoop@kylin:~$ vim /etc/hadoop/conf/yarn-site.xml
---
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
---
hadoop@kylin:~$ yarn --daemon start resourcemanager
hadoop@kylin:~$ yarn --daemon start nodemanager
hadoop@kylin:~$ mapred --daemon start historyserver
hadoop@kylin:~$ jps
7091 Jps
4629 NameNode
5946 DataNode
6829 JobHistoryServer
6653 NodeManager
5823 SecondaryNameNode
6335 ResourceManager
hadoop@kylin:~$ touch hadoopstart.sh
hadoop@kylin:~$ chmod +x hadoopstart.sh
hadoop@kylin:~$ vim hadoopstart.sh
hadoop@kylin:~$ cat hadoopstart.sh
#!/bin/bash
##############################
#Hadoopstart.sh
#Author kylin
##############################
$HADOOP_HOME/bin/hdfs --daemon start namenode
$HADOOP_HOME/bin/hdfs --daemon start secondarynamenode
$HADOOP_HOME/bin/hdfs --daemon start datanode
$HADOOP_HOME/bin/yarn --daemon start resourcemanager
$HADOOP_HOME/bin/yarn --daemon start nodemanager
$HADOOP_HOME/bin/yarn --daemon start proxyserver
$HADOOP_HOME/bin/mapred --daemon start historyserver
hadoop@kylin:~$ hadoop fs -ls /
Found 1 items
drwxrwx--- - hadoop supergroup 0 2020-05-29 02:06 /tmp
hadoop@kylin:~$ mkdir data
hadoop@kylin:~$ vim /home/hadoop/data/demo.txt
hadoop@kylin:~$ cat /home/hadoop/data/demo.txt
Linux Unix windows
hadoop Linux spark
hive hadoop Unix
MapReduce hadoop Linux hive
windows hadoop spark
hadoop@kylin:~$ hadoop fs -mkdir /demo
hadoop@kylin:~$ hadoop fs -put /home/hadoop/data/demo.txt /demo
2020-05-29 03:01:21,722 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
hadoop@kylin:~$ hadoop fs -ls /demo
Found 1 items
-rw-r--r-- 1 hadoop supergroup 105 2020-05-29 03:01 /demo/demo.txt
hadoop@kylin:~$ hadoop fs -cat /demo/demo.txt
2020-05-29 03:02:42,682 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
Linux Unix windows
hadoop Linux spark
hive hadoop Unix
MapReduce hadoop Linux hive
windows hadoop spark
hadoop@kylin:~$ hadoop jar /opt/hadoop/current/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.1.jar wordcount /demo /output
2020-05-29 03:05:15,225 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
2020-05-29 03:05:17,213 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/hadoop/.staging/job_1590688933468_0001
2020-05-29 03:05:17,614 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-05-29 03:05:18,072 INFO input.FileInputFormat: Total input files to process : 1
2020-05-29 03:05:18,197 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-05-29 03:05:18,291 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-05-29 03:05:18,332 INFO mapreduce.JobSubmitter: number of splits:1
2020-05-29 03:05:18,739 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2020-05-29 03:05:18,816 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1590688933468_0001
2020-05-29 03:05:18,816 INFO mapreduce.JobSubmitter: Executing with tokens: []
2020-05-29 03:05:19,612 INFO conf.Configuration: resource-types.xml not found
2020-05-29 03:05:19,613 INFO resource.ResourceUtils: Unable to find 'resource-types.xml'.
2020-05-29 03:05:20,619 INFO impl.YarnClientImpl: Submitted application application_1590688933468_0001
2020-05-29 03:05:20,810 INFO mapreduce.Job: The url to track the job: http://kylin.localdomain:8088/proxy/application_1590688933468_0001/
2020-05-29 03:05:20,813 INFO mapreduce.Job: Running job: job_1590688933468_0001
2020-05-29 03:05:43,515 INFO mapreduce.Job: Job job_1590688933468_0001 running in uber mode : false
2020-05-29 03:05:43,519 INFO mapreduce.Job: map 0% reduce 0%
2020-05-29 03:05:53,818 INFO mapreduce.Job: map 100% reduce 0%
2020-05-29 03:06:03,986 INFO mapreduce.Job: map 100% reduce 100%
2020-05-29 03:06:05,030 INFO mapreduce.Job: Job job_1590688933468_0001 completed successfully
2020-05-29 03:06:05,337 INFO mapreduce.Job: Counters: 54
File System Counters
FILE: Number of bytes read=95
FILE: Number of bytes written=451609
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=205
HDFS: Number of bytes written=61
HDFS: Number of read operations=8
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
HDFS: Number of bytes read erasure-coded=0
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=7398
Total time spent by all reduces in occupied slots (ms)=8407
Total time spent by all map tasks (ms)=7398
Total time spent by all reduce tasks (ms)=8407
Total vcore-milliseconds taken by all map tasks=7398
Total vcore-milliseconds taken by all reduce tasks=8407
Total megabyte-milliseconds taken by all map tasks=7575552
Total megabyte-milliseconds taken by all reduce tasks=8608768
Map-Reduce Framework
Map input records=5
Map output records=16
Map output bytes=168
Map output materialized bytes=95
Input split bytes=100
Combine input records=16
Combine output records=7
Reduce input groups=7
Reduce shuffle bytes=95
Reduce input records=7
Reduce output records=7
Spilled Records=14
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=303
CPU time spent (ms)=4620
Physical memory (bytes) snapshot=487563264
Virtual memory (bytes) snapshot=5311295488
Total committed heap usage (bytes)=446693376
Peak Map Physical memory (bytes)=297992192
Peak Map Virtual memory (bytes)=2652413952
Peak Reduce Physical memory (bytes)=189571072
Peak Reduce Virtual memory (bytes)=2658881536
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=105
File Output Format Counters
Bytes Written=61
hadoop@kylin:~$ hadoop fs -ls /output
Found 2 items
-rw-r--r-- 1 hadoop supergroup 0 2020-05-29 03:06 /output/_SUCCESS
-rw-r--r-- 1 hadoop supergroup 61 2020-05-29 03:06 /output/part-r-00000
hadoop@kylin:~$ hadoop fs -text /output/part-r-00000
2020-05-29 03:10:44,893 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
Linux 3
MapReduce 1
Unix 2
hadoop 4
hive 2
spark 2
windows 2
hadoop@kylin:~$ touch hadoopstop.sh
hadoop@kylin:~$ chmod +x hadoopstop.sh
hadoop@kylin:~$ vim hadoopstop.sh
hadoop@kylin:~$ cat hadoopstop.sh
#!/bin/bash
##############################
#Hadoopstart.sh
#Author kylin
##############################
$HADOOP_HOME/bin/mapred --daemon stop historyserver
$HADOOP_HOME/bin/yarn --daemon stop proxyserver
$HADOOP_HOME/bin/yarn --daemon stop nodemanager
$HADOOP_HOME/bin/yarn --daemon stop resourcemanager
$HADOOP_HOME/bin/hdfs --daemon stop datanode
$HADOOP_HOME/bin/hdfs --daemon stop secondarynamenode
$HADOOP_HOME/bin/hdfs --daemon stop namenode
hadoop@kylin:~$ hdfs dfsadmin -report
Configured Capacity: 269490393088 (250.98 GB)
Present Capacity: 251389489152 (234.12 GB)
DFS Remaining: 251389173760 (234.12 GB)
DFS Used: 315392 (308 KB)
DFS Used%: 0.00%
Replicated Blocks:
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
Missing blocks (with replication factor 1): 0
Low redundancy blocks with highest priority to recover: 0
Pending deletion blocks: 0
Erasure Coded Block Groups:
Low redundancy block groups: 0
Block groups with corrupt internal blocks: 0
Missing block groups: 0
Low redundancy blocks with highest priority to recover: 0
Pending deletion blocks: 0
-------------------------------------------------
Live datanodes (1):
Name: 127.0.0.1:9866 (localhost)
Hostname: kylin.localdomain
Decommission Status : Normal
Configured Capacity: 269490393088 (250.98 GB)
DFS Used: 315392 (308 KB)
Non DFS Used: 4340232192 (4.04 GB)
DFS Remaining: 251389173760 (234.12 GB)
DFS Used%: 0.00%
DFS Remaining%: 93.28%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Fri May 29 09:35:35 CST 2020
Last Block Report: Fri May 29 08:58:44 CST 2020
Num of Blocks: 4
#https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/ClusterSetup.html
# hive 3.1.2
# https://www.cnblogs.com/weavepub/p/11130869.html
root@kylin:~/wget# sudo wget https://mirror.bit.edu.cn/apache/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz
#元数据库使用mysql
#创建元数据数据库(metadata)
root@kylin:~# mysql -u root --password='123456' -e "create database hive default character set utf8 DEFAULT COLLATE utf8_general_ci"
mysql: [Warning] Using a password on the command line interface can be insecure.
root@kylin:~# mysql -u root --password='123456' -e "GRANT ALL ON hive.* TO 'hive'@'%' IDENTIFIED BY '123456'"
mysql: [Warning] Using a password on the command line interface can be insecure.
root@kylin:~# mysql -u root --password='123456' -e "flush privileges"
mysql: [Warning] Using a password on the command line interface can be insecure.
root@kylin:~# mysql -u root --password='123456' -e "SHOW GRANTS FOR 'hive'@'%'"
mysql: [Warning] Using a password on the command line interface can be insecure.
+------------------------------------------------+
| Grants for hive@% |
+------------------------------------------------+
| GRANT USAGE ON *.* TO 'hive'@'%' |
| GRANT ALL PRIVILEGES ON `hive`.* TO 'hive'@'%' |
+------------------------------------------------+
root@kylin:~#
root@kylin:~/wget# mkdir /opt/hive
root@kylin:~/wget# tar -zxf apache-hive-3.1.2-bin.tar.gz -C /opt/hive
root@kylin:~/wget# cd /opt/hive/
root@kylin:/opt/hive# ll
total 12
drwxr-xr-x 3 root root 4096 May 29 10:47 ./
drwxr-xr-x 4 root root 4096 May 29 10:47 ../
drwxr-xr-x 10 root root 4096 May 29 10:47 apache-hive-3.1.2-bin/
root@kylin:/opt/hive# mv apache-hive-3.1.2-bin hive-3.1.2
root@kylin:/opt/hive# ln -s hive-3.1.2 current
root@kylin:/opt/hive# chown -R hadoop:hadoop /opt/hive/
root@kylin:/opt/hive# ll
total 12
drwxr-xr-x 3 hadoop hadoop 4096 May 29 11:00 ./
drwxr-xr-x 4 root root 4096 May 29 10:47 ../
lrwxrwxrwx 1 hadoop hadoop 10 May 29 11:00 current -> hive-3.1.2/
drwxr-xr-x 10 hadoop hadoop 4096 May 29 10:47 hive-3.1.2/
root@kylin:~# mkdir /etc/hive
root@kylin:~# cp -r /opt/hive/current/conf /etc/hive/conf
root@kylin:~# chown -R hadoop:hadoop /etc/hive/
root@kylin:~# vim /home/hadoop/.bashrc
---
# Hive
export HIVE_HOME=/opt/hive/current
export HCAT_HOME=$HIVE_HOME/hcatalog
export HIVE_CONF_DIR=/etc/hive/conf
export PATH=$PATH:$HIVE_HOME/bin
---
hadoop@kylin:/etc/hive/conf$ source /home/hadoop/.bashrc
root@kylin:~# cd /usr/share/java/
root@kylin:/usr/share/java# ll
total 992
drwxr-xr-x 2 root root 4096 May 28 18:41 ./
drwxr-xr-x 113 root root 4096 May 28 18:41 ../
-rw-r--r-- 1 root root 2613 Feb 21 2019 libintl.jar
-rw-r--r-- 1 root root 1002812 Feb 24 2018 mysql-connector-java-5.1.45.jar
lrwxrwxrwx 1 root root 31 Feb 24 2018 mysql-connector-java.jar -> mysql-connector-java-5.1.45.jar
lrwxrwxrwx 1 root root 24 Feb 24 2018 mysql.jar -> mysql-connector-java.jar
root@kylin:/usr/share/java# cp mysql-connector-java-5.1.45.jar /opt/hive/current/lib/
root@kylin:/usr/share/java# cd /opt/hive/current/lib/
root@kylin:/opt/hive/current/lib# chown hadoop:hadoop mysql-connector-java-5.1.45.jar
root@kylin:/opt/hive/current/lib# su hadoop
hadoop@kylin:/opt/hive/current/lib$ ln -s mysql-connector-java-5.1.45.jar mysql-connector-java.jar
hadoop@kylin:/opt/hive/current/lib$ cd ..
hadoop@kylin:/opt/hive/current$ hadoop fs -mkdir -p /user/hive/warehouse
hadoop@kylin:/opt/hive/current$ hadoop fs -chmod g+w /user/hive/warehouse
hadoop@kylin:/opt/hive/current$ hadoop fs -ls /user/hive/warehouse
hadoop@kylin:/opt/hive/current$ hadoop fs -ls /user/hive
Found 1 items
drwxrwxrwx - hadoop supergroup 0 2020-05-29 11:43 /user/hive/warehouse
#由于hadoop启动后,HDFS上就有 /tmp目录,不然就需要创建创建并赋权:
#$HADOOP_HOME/bin/hadoop fs -mkdir /tmp
#$HADOOP_HOME/bin/hadoop fs -chmod g+w /tmp
#
hadoop@kylin:/opt/hive/current$ cd /etc/hive/conf/
hadoop@kylin:/etc/hive/conf$ pwd
/etc/hive/conf
hadoop@kylin:/etc/hive/conf$ mv hive-exec-log4j2.properties.template hive-exec-log4j2.properties
hadoop@kylin:/etc/hive/conf$ mv hive-log4j2.properties.template hive-log4j2.properties
hadoop@kylin:/etc/hive/conf$ mv hive-default.xml.template hive-default.xml
hadoop@kylin:/etc/hive/conf$ cp hive-default.xml hive-site.xml
hadoop@kylin:/etc/hive/conf$ mv hive-env.sh.template hive-env.sh
hadoop@kylin:/etc/hive/conf$ vim /etc/hive/conf/hive-env.sh
---
export HADOOP_HOME=/opt/hadoop/current
export HIVE_CONF_DIR=/etc/hive/conf
export HIVE_HOME=/opt/hive/current
export HIVE_AUX_JARS_PATH=$HIVE_HOME/lib
---
hadoop@kylin:/etc/hive/conf$ vim hive-site.xml
hadoop@kylin:/etc/hive/conf$ cat hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/hive?allowMultiQueries=true&useSSL=false&verifyServerCertificate=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>datanucleus.readOnlyDatastore</name>
<value>false</value>
</property>
<property>
<name>datanucleus.fixedDatastore</name>
<value>false</value>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateTables</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateColumns</name>
<value>true</value>
</property>
</configuration>
hadoop@kylin:~$ $HIVE_HOME/bin/schematool -initSchema -dbType mysql
Exception in thread "main" java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)V
#处理上述报错
#解决方案:hive 3.2.1中默认的guava包为guava-19.0.jar($HIVE_HOME/lib/),在https://mvnrepository.com/artifact/com.google.guava/guava下载比较新的guava包替换即可。
#删除库中所有表:
SELECT concat('DROP TABLE IF EXISTS ', table_name, ';')
FROM information_schema.tables
WHERE table_schema = 'hive';
mysql> DROP TABLE IF EXISTS WM_RESOURCEPLAN;
ERROR 1217 (23000): Cannot delete or update a parent row: a foreign key constraint fails
#错误处理
mysql> SET FOREIGN_KEY_CHECKS = 0;
Query OK, 0 rows affected (0.00 sec)
mysql> DROP TABLE IF EXISTS WM_RESOURCEPLAN;
Query OK, 0 rows affected (0.03 sec)
mysql> SET FOREIGN_KEY_CHECKS = 1;
Query OK, 0 rows affected (0.00 sec)
hadoop@kylin:~$ $HIVE_HOME/bin/schematool -initSchema -dbType mysql
Metastore connection URL: jdbc:mysql://localhost:3306/hive?allowMultiQueries=true&useSSL=false&verifyServerCertificate=false
Metastore Connection Driver : com.mysql.jdbc.Driver
Metastore connection User: hive
Starting metastore schema initialization to 3.1.0
Initialization script hive-schema-3.1.0.mysql.sql
Initialization script completed
schemaTool completed
#启动hive
hadoop@kylin:~$ $HIVE_HOME/bin/hive
Hive Session ID = 65b6e98e-b1df-4dbc-a8cc-1b3732893512
Logging initialized using configuration in file:/etc/hive/conf/hive-log4j2.properties Async: true
Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
Hive Session ID = 75771681-679c-4676-ab8a-c1cb1bd9e3d1
hive> show databases;
OK
default
Time taken: 0.129 seconds, Fetched: 1 row(s)
hive> quit;
hadoop@kylin:~$ $HIVE_HOME/bin/hiveserver2 &
[1] 8659
hadoop@kylin:~$ 2020-05-29 15:54:43: Starting HiveServer2
Hive Session ID = 2ec73789-52dc-4254-86a0-5a2346eca8c6
Hive Session ID = be6f9dcf-cdd2-459c-9d57-5d71158af1a7
Hive Session ID = 461cc314-6292-4776-9883-ba92a9b78dba
Hive Session ID = 4c4a5b6c-636a-45d0-98a0-2d43c3c7fe33
hadoop@kylin:~$ jps
8659 RunJar
309 SecondaryNameNode
520 NodeManager
376 DataNode
249 NameNode
604 JobHistoryServer
444 ResourceManager
8846 Jps
---
hadoop@kylin:/opt/hive/current/logs$ touch metastore.log
hadoop@kylin:/opt/hive/current/logs$ touch hiveserver2.log
hadoop@kylin:/opt/hive/current/logs$ chmod o+w hiveserver2.log
hadoop@kylin:/opt/hive/current/logs$ chmod o+w metastore.log
hadoop@kylin:~$ nohup hive --service metastore > $HIVE_HOME/logs/metastore.log 2>&1 &
[1] 12022
hadoop@kylin:~$ nohup hive --service hiveserver2 > $HIVE_HOME/logs/hiveserver2.log 2>&1 &
[2] 12172
hadoop@kylin:~$ jps | grep RunJar
12022 RunJar
12172 RunJar
---
hadoop@kylin:~$ netstat -tulnp | grep 10000
(Not all processes could be identified, non-owned process info
will not be shown, you would have to be root to see it all.)
tcp6 0 0 :::10000 :::* LISTEN 8659/java
hadoop@kylin:~$ vim /etc/hive/conf/hive-site.xml
---
</property>
<property>
<name>hive.server2.authentication</name>
<value>CUSTOM</value>
</property>
<property>
<name>hive.server2.custom.authentication.class</name>
<value>com.weiming.SampleAuth.SampleAuthenticator</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
<description>TCP port number to listen on, default 10000</description>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NOSASL</value>
</property>
# 在 CLI 命令行上显示当前数据库,以及查询表的信息
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
---
hadoop@kylin:~$ mkdir /opt/hive/current/SampleAuth && cd /opt/hive/current/SampleAuth
hadoop@kylin:/opt/hive/current/SampleAuth$ vim SampleAuthenticator.java
---
package com.weiming.SampleAuth;
import java.util.Hashtable;
import javax.security.sasl.AuthenticationException;
import org.apache.hive.service.auth.PasswdAuthenticationProvider;
/*
* javac -cp $HIVE_HOME/lib/hive-service-0.12.0-cdh5.0.0-beta-2.jar SampleAuthenticator.java -d .
* jar cf sampleauth.jar hive
* cp sampleauth.jar $HIVE_HOME/lib/.
* */
public class SampleAuthenticator implements PasswdAuthenticationProvider {
Hashtable<String, String> store = null;
public SampleAuthenticator () {
store = new Hashtable<String, String>();
store.put("user1", "passwd1");
store.put("user2", "passwd2");
}
@Override
public void Authenticate(String user, String password)
throws AuthenticationException {
String storedPasswd = store.get(user);
if (storedPasswd != null && storedPasswd.equals(password))
return;
throw new AuthenticationException("SampleAuthenticator: Error validating user");
}
}
---
hadoop@kylin:/opt/hive/current/SampleAuth$ javac -cp /opt/hive/current/lib/hive-service-3.1.2.jar SampleAuthenticator.java -d .
hadoop@kylin:/opt/hive/current/SampleAuth$ jar cf sampleauth.jar hive
hadoop@kylin:/opt/hive/current/SampleAuth$ ll
total 20
drwxrwxr-x 3 hadoop hadoop 4096 May 29 16:37 ./
drwxr-xr-x 12 hadoop hadoop 4096 May 29 16:08 ../
-rw-rw-r-- 1 hadoop hadoop 960 May 29 16:16 SampleAuthenticator.java
drwxrwxr-x 3 hadoop hadoop 4096 May 29 16:36 com/
-rw-rw-r-- 1 hadoop hadoop 342 May 29 16:41 sampleauth.jar
hadoop@kylin:/opt/hive/current/SampleAuth$ cp sampleauth.jar /opt/hive/current/lib/
hadoop@kylin:~$ vim /etc/hadoop/conf/core-site.xml
---
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
---
#由于我管理及启动hive使用的用户组及用户为hadoop,所以设置为:hadoop.groups;hadoop.hosts。
hadoop@kylin:~$ hdfs dfsadmin -refreshSuperUserGroupsConfiguration
Refresh super user groups configuration successful
hadoop@kylin:~$ yarn rmadmin -refreshSuperUserGroupsConfiguration
2020-05-29 20:36:59,284 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8033
hadoop@kylin:~$ $HIVE_HOME/bin/beeline
Beeline version 3.1.2 by Apache Hive
beeline> !connect jdbc:hive2://hadoop:10000/default
Connecting to jdbc:hive2://hadoop:10000/default
Enter username for jdbc:hive2://hadoop:10000/default: user2
Enter password for jdbc:hive2://hadoop:10000/default: *******
20/05/29 22:51:20 [main]: WARN jdbc.HiveConnection: Failed to connect to hadoop:10000
Unexpected end of file when reading from HS2 server. The root cause might be too many concurrent connections. Please ask the administrator to check the number of active connections, and adjust hive.server2.thrift.max.worker.threads if applicable.
Error: Could not open client transport with JDBC Uri: jdbc:hive2://hadoop:10000/default: null (state=08S01,code=0)
beeline> !connect jdbc:hive2://hadoop:10000/default;auth=noSasl
Connecting to jdbc:hive2://hadoop:10000/default;auth=noSasl
Enter username for jdbc:hive2://hadoop:10000/default: user2
Enter password for jdbc:hive2://hadoop:10000/default: *******
20/05/29 22:51:39 [main]: WARN jdbc.HiveConnection: Failed to connect to hadoop:10000
Error: Could not open client transport with JDBC Uri: jdbc:hive2://hadoop:10000/default;auth=noSasl: Failed to open new session: java.lang.RuntimeException: org.apache.hadoop.security.AccessControlException: Permission denied: user=user2, access=EXECUTE, inode="/tmp":hadoop:supergroup:drwxrwx---
hadoop@kylin:~$ hdfs dfs -chmod o+rwx /tmp
beeline> !connect jdbc:hive2://hadoop:10000/default;auth=noSasl
Connecting to jdbc:hive2://hadoop:10000/default;auth=noSasl
Enter username for jdbc:hive2://hadoop:10000/default: user2
Enter password for jdbc:hive2://hadoop:10000/default: *******
Connected to: Apache Hive (version 3.1.2)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://hadoop:10000/default>
#此时就可以在DBever、DataGrip等客户端连接hive了。
#指定Schema时,后面加上字符串;auth=noSasl指定连接属性即可.
#jdbc:hive2://hadoop:10000/default;auth=noSasl
#
# hive -S 则不输出执行步骤日志
hadoop@kylin:~$ hive
Hive Session ID = e5823ff3-9557-4fc7-9a8d-0f77f5df3511
Logging initialized using configuration in file:/etc/hive/conf/hive-log4j2.properties Async: true
Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
Hive Session ID = dca74a09-3e5a-45ce-83ed-1fe8506211b7
# 如果环境中中有HBase,且hive输出大量INFO日志,则在HBase的conf路径下执行以下操作:
# sed -i 's/INFO/ERROR/' /etc/hbase/conf/log4j.properties
hive> desc function sum;
OK
sum(x) - Returns the sum of a set of numbers
Time taken: 4.3 seconds, Fetched: 1 row(s)
hive> create database test_bdw;
OK
Time taken: 0.97 seconds
hive> use test_bdw;
OK
Time taken: 0.178 seconds
hive> create table student(id int, name string) row format delimited fields terminated by '\t';
OK
Time taken: 2.69 seconds
hive> desc student;
OK
id int
name string
Time taken: 0.481 seconds, Fetched: 2 row(s)
hive>
hadoop@kylin:/opt/hive/current/SampleAuth$ cd $HIVE_HOME
hadoop@kylin:/opt/hive/current$ mkdir warehouse/test_bdw
hadoop@kylin:/opt/hive/current$ touch warehouse/test_bdw/student.dat
hadoop@kylin:/opt/hive/current$ vim warehouse/test_bdw/student.dat
hadoop@kylin:/opt/hive/current$ cat warehouse/test_bdw/student.dat
001 david
002 fab
003 kaishen
004 josen
005 arvin
006 wada
007 weda
008 banana
009 arnold
010 simon
011 scott
hive> load data local inpath '/opt/hive/current/warehouse/test_bdw/student.dat' into table test_bdw.student;
Loading data to table test_bdw.student
OK
Time taken: 3.21 seconds
hive> select * from student;
OK
1 david
2 fab
3 kaishen
4 josen
5 arvin
6 wada
7 weda
8 banana
9 arnold
10 simon
11 scott
NULL NULL
Time taken: 7.677 seconds, Fetched: 12 row(s)
hive>
hadoop@kylin:~$ hadoop fs -ls /
Found 4 items
drwxr-xr-x - hadoop supergroup 0 2020-05-29 03:01 /demo
drwxr-xr-x - hadoop supergroup 0 2020-05-29 03:06 /output
drwxrwxrwx - hadoop supergroup 0 2020-05-29 15:47 /tmp
drwxr-xr-x - hadoop supergroup 0 2020-05-29 14:05 /user
hadoop@kylin:~$ hadoop fs -ls /user
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2020-05-29 14:05 /user/hive
hadoop@kylin:~$ hadoop fs -ls /user/hive
Found 1 items
drwxrwxrwx - hadoop supergroup 0 2020-05-29 23:51 /user/hive/warehouse
hadoop@kylin:~$ hadoop fs -ls /user/hive/warehouse
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2020-05-29 23:51 /user/hive/warehouse/test_bdw.db
hadoop@kylin:~$ hadoop fs -ls /user/hive/warehouse/test_bdw.db
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2020-05-29 23:59 /user/hive/warehouse/test_bdw.db/student
hadoop@kylin:~$ hadoop fs -ls /user/hive/warehouse/test_bdw.db/student
Found 1 items
-rw-r--r-- 1 hadoop supergroup 111 2020-05-30 00:00 /user/hive/warehouse/test_bdw.db/student/student.dat
hadoop@kylin:~$ hadoop fs -cat /user/hive/warehouse/test_bdw.db/student/student.dat
2020-05-30 00:09:04,738 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
001 david
002 fab
003 kaishen
004 josen
005 arvin
006 wada
007 weda
008 banana
009 arnold
010 simon
011 scott
hadoop@kylin:~$ hadoop fs -text /user/hive/warehouse/test_bdw.db/student/student.dat
2020-05-30 00:09:28,852 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
001 david
002 fab
003 kaishen
004 josen
005 arvin
006 wada
007 weda
008 banana
009 arnold
010 simon
011 scott
hadoop@kylin:~$
#Running HCatalog
#https://cwiki.apache.org/confluence/display/Hive/GettingStarted
hadoop@kylin:~$ $HIVE_HOME/hcatalog/sbin/hcat_server.sh
Usage: /opt/hive/current/hcatalog/sbin/hcat_server.sh [--config confdir] COMMAND
start Start HCatalog Server
stop Stop HCatalog Server
hadoop@kylin:~$ $HIVE_HOME/hcatalog/sbin/hcat_server.sh start
Missing hive-site.xml, expected at [/opt/hive/current/conf/hive-site.xml]
hadoop@kylin:~$ cd /opt/hive/current/conf/
hadoop@kylin:/opt/hive/current/conf$ ll
total 340
drwxr-xr-x 2 hadoop hadoop 4096 May 29 10:47 ./
drwxr-xr-x 12 hadoop hadoop 4096 May 29 16:08 ../
-rw-r--r-- 1 hadoop hadoop 1596 Aug 23 2019 beeline-log4j2.properties.template
-rw-r--r-- 1 hadoop hadoop 300482 Aug 23 2019 hive-default.xml.template
-rw-r--r-- 1 hadoop hadoop 2365 Aug 23 2019 hive-env.sh.template
-rw-r--r-- 1 hadoop hadoop 2274 Aug 23 2019 hive-exec-log4j2.properties.template
-rw-r--r-- 1 hadoop hadoop 3086 Aug 23 2019 hive-log4j2.properties.template
-rw-r--r-- 1 hadoop hadoop 2060 Aug 23 2019 ivysettings.xml
-rw-r--r-- 1 hadoop hadoop 3558 Aug 23 2019 llap-cli-log4j2.properties.template
-rw-r--r-- 1 hadoop hadoop 7163 Aug 23 2019 llap-daemon-log4j2.properties.template
-rw-r--r-- 1 hadoop hadoop 2662 Aug 23 2019 parquet-logging.properties
hadoop@kylin:/opt/hive/current/conf$ ln -s $HIVE_CONF_DIR/hive-site.xml hive-site.xml
hadoop@kylin:/opt/hive/current/conf$ ll
total 340
drwxr-xr-x 2 hadoop hadoop 4096 May 30 14:13 ./
drwxr-xr-x 12 hadoop hadoop 4096 May 29 16:08 ../
-rw-r--r-- 1 hadoop hadoop 1596 Aug 23 2019 beeline-log4j2.properties.template
-rw-r--r-- 1 hadoop hadoop 300482 Aug 23 2019 hive-default.xml.template
-rw-r--r-- 1 hadoop hadoop 2365 Aug 23 2019 hive-env.sh.template
-rw-r--r-- 1 hadoop hadoop 2274 Aug 23 2019 hive-exec-log4j2.properties.template
-rw-r--r-- 1 hadoop hadoop 3086 Aug 23 2019 hive-log4j2.properties.template
lrwxrwxrwx 1 hadoop hadoop 28 May 30 14:13 hive-site.xml -> /etc/hive/conf/hive-site.xml
-rw-r--r-- 1 hadoop hadoop 2060 Aug 23 2019 ivysettings.xml
-rw-r--r-- 1 hadoop hadoop 3558 Aug 23 2019 llap-cli-log4j2.properties.template
-rw-r--r-- 1 hadoop hadoop 7163 Aug 23 2019 llap-daemon-log4j2.properties.template
-rw-r--r-- 1 hadoop hadoop 2662 Aug 23 2019 parquet-logging.properties
hadoop@kylin:/opt/hive/current/conf$ cd -
/home/hadoop
hadoop@kylin:~$ $HIVE_HOME/hcatalog/sbin/hcat_server.sh start
Started metastore server init, testing if initialized correctly...
/opt/hive/current/hcatalog/sbin/hcat_server.sh: line 91: /opt/hive/current/hcatalog/sbin/../var/log/hcat.out: No such file or directory
Metastore startup failed, see /opt/hive/current/hcatalog/sbin/../var/log/hcat.err
hadoop@kylin:~$ cd /opt/hive/current/hcatalog/sbin/../
hadoop@kylin:/opt/hive/current/hcatalog$ ll
total 28
drwxr-xr-x 7 hadoop hadoop 4096 May 29 10:47 ./
drwxr-xr-x 12 hadoop hadoop 4096 May 29 16:08 ../
drwxr-xr-x 2 hadoop hadoop 4096 May 29 10:47 bin/
drwxr-xr-x 4 hadoop hadoop 4096 May 29 10:47 etc/
drwxr-xr-x 2 hadoop hadoop 4096 May 29 10:47 libexec/
drwxr-xr-x 2 hadoop hadoop 4096 May 29 10:47 sbin/
drwxr-xr-x 5 hadoop hadoop 4096 May 29 10:47 share/
hadoop@kylin:/opt/hive/current/hcatalog$ mkdir -p var/log
hadoop@kylin:/opt/hive/current/hcatalog$ cd /home/hadoop/
hadoop@kylin:~$ $HIVE_HOME/hcatalog/sbin/hcat_server.sh start
Started metastore server init, testing if initialized correctly...
Metastore initialized successfully on port[9083].
hadoop@kylin:~$ jps
1585 RunJar
1218 NodeManager
2466 Jps
995 SecondaryNameNode
932 NameNode
1431 JobHistoryServer
1065 DataNode
2281 RunJar
1147 ResourceManager
hadoop@kylin:~$ netstat -an | grep 9083
tcp6 0 0 :::9083 :::* LISTEN
hadoop@kylin:~$ cd /opt/hive/current/hcatalog/sbin/../
hadoop@kylin:/opt/hive/current/hcatalog$ ll
total 32
drwxr-xr-x 8 hadoop hadoop 4096 May 30 14:18 ./
drwxr-xr-x 12 hadoop hadoop 4096 May 29 16:08 ../
drwxr-xr-x 2 hadoop hadoop 4096 May 29 10:47 bin/
drwxr-xr-x 4 hadoop hadoop 4096 May 29 10:47 etc/
drwxr-xr-x 2 hadoop hadoop 4096 May 29 10:47 libexec/
drwxr-xr-x 2 hadoop hadoop 4096 May 29 10:47 sbin/
drwxr-xr-x 5 hadoop hadoop 4096 May 29 10:47 share/
drwxrwxr-x 3 hadoop hadoop 4096 May 30 14:18 var/
hadoop@kylin:/opt/hive/current/hcatalog$ cd var/log/
hadoop@kylin:/opt/hive/current/hcatalog/var/log$ ll
total 24
drwxrwxr-x 2 hadoop hadoop 4096 May 30 14:18 ./
drwxrwxr-x 3 hadoop hadoop 4096 May 30 14:18 ../
-rw-rw-r-- 1 hadoop hadoop 0 May 30 14:18 hcat.err
-rw-rw-r-- 1 hadoop hadoop 52 May 30 14:18 hcat.out
-rw-rw-r-- 1 hadoop hadoop 5 May 30 14:18 hcat.pid
-rw-rw-r-- 1 hadoop hadoop 7061 May 30 14:18 hcat_gc.log-202005301418
hadoop@kylin:~$ $HIVE_HOME/hcatalog/bin/hcat
2020-05-30 14:56:26,045 INFO conf.HiveConf: Found configuration file file:/etc/hive/conf/hive-site.xml
Hive Session ID = 8871291c-bf98-40ae-a314-1ec5f27045b0
2020-05-30 14:56:33,250 INFO SessionState: Hive Session ID = 8871291c-bf98-40ae-a314-1ec5f27045b0
2020-05-30 14:56:37,131 INFO session.SessionState: Created HDFS directory: /tmp/hive/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0
2020-05-30 14:56:37,187 INFO session.SessionState: Created local directory: /tmp/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0
2020-05-30 14:56:37,204 INFO session.SessionState: Created HDFS directory: /tmp/hive/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0/_tmp_space.db
usage: hcat { -e "" | -f "" } [ -g "" ] [ -p "" ] [ -D"=" ]
-D <property=value> use hadoop value for given property
-e <exec> hcat command given from command line
-f <file> hcat commands in file
-g <group> group for the db/table specified in CREATE statement
-h,--help Print help information
-p <perms> permissions for the db/table specified in CREATE statement
2020-05-30 14:56:37,287 INFO session.SessionState: Deleted directory: /tmp/hive/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0 on fs with scheme hdfs
2020-05-30 14:56:37,300 INFO session.SessionState: Deleted directory: /tmp/hadoop/8871291c-bf98-40ae-a314-1ec5f27045b0 on fs with scheme file
2020-05-30 14:56:37,593 INFO conf.MetastoreConf: Found configuration file file:/etc/hive/conf/hive-site.xml
2020-05-30 14:56:37,596 INFO conf.MetastoreConf: Unable to find config file hivemetastore-site.xml
2020-05-30 14:56:37,596 INFO conf.MetastoreConf: Found configuration file null
2020-05-30 14:56:37,599 INFO conf.MetastoreConf: Unable to find config file metastore-site.xml
2020-05-30 14:56:37,599 INFO conf.MetastoreConf: Found configuration file null
2020-05-30 14:56:37,758 INFO session.SessionState: Failed to remove classloaders from DataNucleus
java.lang.NullPointerException
。。。。。
hadoop@kylin:~$ cd $HIVE_HOME
#在哪个目录下运行命令就会在哪个目录下产生log
hadoop@kylin:/opt/hive/current$ $HIVE_HOME/hcatalog/sbin/webhcat_server.sh
Lenght of string is non zero
usage: /opt/hive/current/hcatalog/sbin/webhcat_server.sh [start|startDebug|stop|foreground]
start Start the Webhcat Server
startDebug Start the Webhcat Server listening for debugger on port 5005
stop Stop the Webhcat Server
foreground Run the Webhcat Server in the foreground
hadoop@kylin:/opt/hive/current$ $HIVE_HOME/hcatalog/sbin/webhcat_server.sh start
Lenght of string is non zero
webhcat: starting ...
webhcat: /opt/hadoop/current/bin/hadoop jar /opt/hive/hive-3.1.2/hcatalog/sbin/../share/webhcat/svr/lib/hive-webhcat-3.1.2.jar org.apache.hive.hcatalog.templeton.Main
webhcat: starting ... started.
webhcat: done
hadoop@kylin:/opt/hive/current$ ll
total 564
drwxr-xr-x 12 hadoop hadoop 4096 May 30 15:00 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 29 11:00 ../
-rw-r--r-- 1 hadoop hadoop 20798 Aug 23 2019 LICENSE
-rw-r--r-- 1 hadoop hadoop 230 Aug 23 2019 NOTICE
-rw-r--r-- 1 hadoop hadoop 2469 Aug 23 2019 RELEASE_NOTES.txt
drwxrwxr-x 3 hadoop hadoop 4096 May 29 23:32 SampleAuth/
drwxr-xr-x 3 hadoop hadoop 4096 May 29 10:47 bin/
drwxr-xr-x 2 hadoop hadoop 4096 May 29 10:47 binary-package-licenses/
drwxr-xr-x 2 hadoop hadoop 4096 May 30 14:48 conf/
drwxr-xr-x 4 hadoop hadoop 4096 May 29 10:47 examples/
drwxr-xr-x 8 hadoop hadoop 4096 May 30 14:18 hcatalog/
drwxr-xr-x 2 hadoop hadoop 4096 May 29 10:47 jdbc/
drwxr-xr-x 4 hadoop hadoop 16384 May 29 16:42 lib/
drwxr-xr-x 4 hadoop hadoop 4096 May 29 10:47 scripts/
drwxrwxr-x 3 hadoop hadoop 4096 May 29 23:54 warehouse/
-rw-rw-r-- 1 hadoop hadoop 0 May 30 15:00 webhcat-console-error.log
-rw-rw-r-- 1 hadoop hadoop 35 May 30 15:00 webhcat-console.log
-rw-rw-r-- 1 hadoop hadoop 472856 May 30 15:02 webhcat.log
-rw-rw-r-- 1 hadoop hadoop 5 May 30 15:02 webhcat.pid
hadoop@kylin:/opt/hive/current$ $HIVE_HOME/hcatalog/sbin/webhcat_server.sh stop
Lenght of string is non zero
webhcat: stopping ...
webhcat: stopping ... stopped
webhcat: done
hadoop@kylin:/opt/hive/current$
# HiveSQL相关操作
# https://cwiki.apache.org/confluence/display/Hive/GettingStarted
#
#Simple Example Use Cases
CREATE TABLE u_data (
userid INT,
movieid INT,
rating INT,
unixtime STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE;
hadoop@kylin:~/data$ wget http://files.grouplens.org/datasets/movielens/ml-100k.zip
hadoop@kylin:~/data$ unzip ml-100k.zip
hive> load data local inpath '/home/hadoop/data/ml-100k/u.data' overwrite into table u_data;
hive> SELECT * FROM u_data limit 5;
OK
196 242 3 881250949
186 302 3 891717742
22 377 1 878887116
244 51 2 880606923
166 346 1 886397596
Time taken: 0.549 seconds, Fetched: 5 row(s)
hive> desc u_data;
OK
userid int
movieid int
rating int
unixtime string
Time taken: 0.171 seconds, Fetched: 4 row(s)
---
hadoop@kylin:~/data$ cat weekday_mapper.py
import sys
import datetime
for line in sys.stdin:
line = line.strip()
userid, movieid, rating, unixtime = line.split('\t')
weekday = datetime.datetime.fromtimestamp(float(unixtime)).isoweekday()
print ('\t'.join([userid, movieid, rating, str(weekday)]))
---
hive> INSERT OVERWRITE TABLE u_data_new
> SELECT
> TRANSFORM (userid, movieid, rating, unixtime)
> USING 'python3 weekday_mapper.py'
> AS (userid, movieid, rating, weekday)
> FROM u_data;
hive> SELECT weekday, COUNT(*)
> FROM u_data_new
> GROUP BY weekday;
Query ID = hadoop_20200530192253_3d5afdf4-0d16-4ac1-800b-0d612d099beb
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1590818128923_0009, Tracking URL = http://kylin.localdomain:8088/proxy/application_1590818128923_0009/
Kill Command = /opt/hadoop/current/bin/mapred job -kill job_1590818128923_0009
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2020-05-30 19:23:50,575 Stage-1 map = 0%, reduce = 0%
2020-05-30 19:24:08,948 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 13.7 sec
2020-05-30 19:24:20,636 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 20.63 sec
MapReduce Total cumulative CPU time: 20 seconds 630 msec
Ended Job = job_1590818128923_0009
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 20.63 sec HDFS Read: 1192395 HDFS Write: 227 SUCCESS
Total MapReduce CPU Time Spent: 20 seconds 630 msec
OK
1 12254
2 13579
3 14430
4 15114
5 14743
6 18229
7 11651
Time taken: 89.704 seconds, Fetched: 7 row(s)
hive> select * from u_data_new limit 5;
OK
196 242 3 4
186 302 3 7
22 377 1 5
244 51 2 4
166 346 1 1
Time taken: 0.496 seconds, Fetched: 5 row(s)
hive>
#HBase 2.2.4
#root@kylin:~/wget# wget https://mirrors.tuna.tsinghua.edu.cn/apache/hbase/stable/hbase-2.2.4-bin.tar.gz
#
root@kylin:~/wget# ll
total 841436
drwxr-xr-x 2 root root 4096 May 30 20:02 ./
drwx------ 8 root root 4096 May 30 20:06 ../
-rw-r--r-- 1 root root 278813748 Aug 27 2019 apache-hive-3.1.2-bin.tar.gz
-rw-r--r-- 1 root root 359196911 Sep 23 2019 hadoop-3.2.1.tar.gz
-rw-r--r-- 1 root root 223600848 Mar 20 16:25 hbase-2.2.4-bin.tar.gz
root@kylin:~/wget# mkdir /opt/hbase
root@kylin:~/wget# tar -zxf hbase-2.2.4-bin.tar.gz -C /opt/hbase
root@kylin:~/wget# cd /opt/hbase/
root@kylin:/opt/hbase# ll
total 12
drwxr-xr-x 3 root root 4096 May 30 20:23 ./
drwxr-xr-x 5 root root 4096 May 30 20:23 ../
drwxr-xr-x 6 root root 4096 May 30 20:23 hbase-2.2.4/
root@kylin:/opt/hbase# ln -s hbase-2.2.4 current
root@kylin:/opt/hbase# chown -R hadoop:hadoop /opt/hbase/
root@kylin:/opt/hbase# ll
total 12
drwxr-xr-x 3 hadoop hadoop 4096 May 30 20:25 ./
drwxr-xr-x 5 root root 4096 May 30 20:23 ../
lrwxrwxrwx 1 hadoop hadoop 11 May 30 20:25 current -> hbase-2.2.4/
drwxr-xr-x 6 hadoop hadoop 4096 May 30 20:23 hbase-2.2.4/
root@kylin:/opt/hbase# ll hbase-2.2.4/
total 944
drwxr-xr-x 6 hadoop hadoop 4096 May 30 20:23 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 30 20:25 ../
-rw-r--r-- 1 hadoop hadoop 164157 Mar 11 12:25 CHANGES.md
-rw-rw-r-- 1 hadoop hadoop 262 May 2 2018 LEGAL
-rw-rw-r-- 1 hadoop hadoop 129312 Mar 11 13:01 LICENSE.txt
-rw-rw-r-- 1 hadoop hadoop 520601 Mar 11 13:01 NOTICE.txt
-rw-r--r-- 1 hadoop hadoop 1477 Jan 20 13:23 README.txt
-rw-r--r-- 1 hadoop hadoop 101401 Mar 11 12:25 RELEASENOTES.md
drwxr-xr-x 4 hadoop hadoop 4096 Mar 11 11:17 bin/
drwxr-xr-x 2 hadoop hadoop 4096 Jan 20 13:46 conf/
drwxr-xr-x 7 hadoop hadoop 4096 Mar 11 12:58 hbase-webapps/
drwxr-xr-x 6 hadoop hadoop 12288 May 30 20:24 lib/
root@kylin:/opt/hbase# mkdir /etc/hbase
root@kylin:/opt/hbase# cp -r /opt/hbase/current/conf /etc/hbase/
root@kylin:/opt/hbase# chown -R hadoop:hadoop /etc/hbase/
root@kylin:~# vim /home/hadoop/.bashrc
---
# HBase
export HBASE_HOME=/opt/hbase/current
export HBASE_CONF_DIR=/etc/hbase/conf
export PATH=$PATH:$HBASE_HOME/bin
---
root@kylin:~# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop
hadoop@kylin:~$ vim /etc/hbase/conf/hbase-env.sh
---
export JAVA_HOME=/usr/java/default/
---
#伪分布式设置
#http://hbase.apache.org/book.html#quickstart
#
hadoop@kylin:~$ vim /etc/hbase/conf/hbase-site.xml
---
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://localhost:9000/hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/opt/hbase/current/data/zookeeper</value>
</property>
<property>
<name>hbase.master.maxclockskew</name>
<value>180000</value>
<description>Time difference of regionserver from master (NTP)</description>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
#localhost:2181
#之后改为localhost 后启动kylin成功!
<value>localhost</value>
<description>多个zk的话,用逗号分隔</description>
</property>
---
hadoop@kylin:~$ cat /etc/hbase/conf/regionservers
localhost
#hadoop@kylin:~$ $HBASE_HOME/bin/start-hbase.sh
# Start
#$HBASE_HOME/bin/hbase-daemon.sh start zookeeper
$ZK_HOME/bin/zkServer.sh start
$HBASE_HOME/bin/hbase-daemon.sh start master
$HBASE_HOME/bin/hbase-daemon.sh start regionserver
#stop
$HBASE_HOME/bin/hbase-daemon.sh stop regionserver
$HBASE_HOME/bin/hbase-daemon.sh stop master
$ZK_HOME/bin/zkServer.sh stop
#$HBASE_HOME/bin/hbase-daemon.sh stop zookeeper
hadoop@kylin:/opt/hbase/current/lib$ hbase version
/opt/hadoop/current/libexec/hadoop-functions.sh: line 2366: HADOOP_ORG.APACHE.HADOOP.HBASE.UTIL.GETJAVAPROPERTY_USER: bad substitution
/opt/hadoop/current/libexec/hadoop-functions.sh: line 2461: HADOOP_ORG.APACHE.HADOOP.HBASE.UTIL.GETJAVAPROPERTY_OPTS: bad substitution
#上述错误替换提示的输出,则对/opt/hadoop/current/libexec/hadoop-functions.sh文件中的对应块进行屏蔽了!!!(2366 2461)
HBase 2.2.4
Source code repository git://hao-OptiPlex-7050/home/hao/open_source/hbase revision=67779d1a325a4f78a468af3339e73bf075888bac
Compiled by hao on 2020年 03月 11日 星期三 12:57:39 CST
From source with checksum 19ada8ab3844a5aa8ccaacdd5f2893ca
hadoop@kylin:~$ jps
30689 JobHistoryServer
30289 ResourceManager
12419 HRegionServer
6708 RunJar
30073 NameNode
30362 NodeManager
30139 SecondaryNameNode
12539 Jps
31356 DataNode
10956 QuorumPeerMain
11853 HMaster
# hbase shell
hadoop@kylin:/opt/hbase/current/bin$ hbase shell
HBase Shell
Use "help" to get list of supported commands.
Use "exit" to quit this interactive shell.
For Reference, please visit: http://hbase.apache.org/2.0/book.html#shell
Version 2.2.4, r67779d1a325a4f78a468af3339e73bf075888bac, 2020年 03月 11日 星期三 12:57:39 CST
Took 0.0112 seconds
hbase(main):003:0> table_help
Help for table-reference commands.
You can either create a table via 'create' and then manipulate the table via commands like 'put', 'get', etc.
See the standard help information for how to use each of these commands.
However, as of 0.96, you can also get a reference to a table, on which you can invoke commands.
For instance, you can get create a table and keep around a reference to it via:
hbase> t = create 't', 'cf'
Or, if you have already created the table, you can get a reference to it:
hbase> t = get_table 't'
You can do things like call 'put' on the table:
hbase> t.put 'r', 'cf:q', 'v'
which puts a row 'r' with column family 'cf', qualifier 'q' and value 'v' into table t.
To read the data out, you can scan the table:
hbase> t.scan
which will read all the rows in table 't'.
Essentially, any command that takes a table name can also be done via table reference.
Other commands include things like: get, delete, deleteall,
get_all_columns, get_counter, count, incr. These functions, along with
the standard JRuby object methods are also available via tab completion.
For more information on how to use each of these commands, you can also just type:
hbase> t.help 'scan'
which will output more information on how to use that command.
You can also do general admin actions directly on a table; things like enable, disable,
flush and drop just by typing:
hbase> t.enable
hbase> t.flush
hbase> t.disable
hbase> t.drop
Note that after dropping a table, your reference to it becomes useless and further usage
is undefined (and not recommended).
Took 0.0030 seconds
hbase(main):004:0>
# phoenix (5.0.0)
# 5.0.0支持Apache HBase 2.0
root@kylin:~/wget# wget https://mirror.bit.edu.cn/apache/phoenix/apache-phoenix-5.0.0-HBase-2.0/bin/apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz
--2020-05-31 07:07:07-- https://mirror.bit.edu.cn/apache/phoenix/apache-phoenix-5.0.0-HBase-2.0/bin/apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz
Resolving mirror.bit.edu.cn (mirror.bit.edu.cn)... 219.143.204.117, 202.204.80.77, 2001:da8:204:1205::22
Connecting to mirror.bit.edu.cn (mirror.bit.edu.cn)|219.143.204.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 436868323 (417M) [application/octet-stream]
Saving to: ‘apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz’
apache-phoenix-5.0.0-HBase-2.0-bin.t 25%[================> ] 107.58M 3.70MB/s eta 87s
root@kylin:~/wget# mkdir /opt/phoenix
root@kylin:~/wget# tar -zxf apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz -C /opt/phoenix/
root@kylin:~/wget# cd /opt/phoenix/
root@kylin:/opt/phoenix# ll
total 12
drwxr-xr-x 3 root root 4096 May 31 22:49 ./
drwxr-xr-x 8 root root 4096 May 31 22:49 ../
drwxr-xr-x 5 502 staff 4096 Jun 27 2018 apache-phoenix-5.0.0-HBase-2.0-bin/
root@kylin:/opt/phoenix# ln -s apache-phoenix-5.0.0-HBase-2.0-bin phoenix-5.0.0-HBase-2.0
root@kylin:/opt/phoenix# ll
total 12
drwxr-xr-x 3 root root 4096 May 31 22:51 ./
drwxr-xr-x 8 root root 4096 May 31 22:49 ../
drwxr-xr-x 5 502 staff 4096 Jun 27 2018 apache-phoenix-5.0.0-HBase-2.0-bin/
lrwxrwxrwx 1 root root 34 May 31 22:51 phoenix-5.0.0-HBase-2.0 -> apache-phoenix-5.0.0-HBase-2.0-bin/
root@kylin:/opt/phoenix# ln -s phoenix-5.0.0-HBase-2.0 current
root@kylin:/opt/phoenix# chown -R hadoop:hadoop /opt/phoenix/
root@kylin:/opt/phoenix# ll current
lrwxrwxrwx 1 hadoop hadoop 23 May 31 22:51 current -> phoenix-5.0.0-HBase-2.0/
root@kylin:/opt/phoenix# ll current/
total 474596
drwxr-xr-x 5 hadoop hadoop 4096 Jun 27 2018 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 31 22:51 ../
-rw-r--r-- 1 hadoop hadoop 144163 Jun 27 2018 LICENSE
-rw-r--r-- 1 hadoop hadoop 10509 Jun 27 2018 NOTICE
-rw-r--r-- 1 hadoop hadoop 1150 Jun 27 2018 README.md
drwxr-xr-x 4 hadoop hadoop 4096 May 31 22:50 bin/
drwxr-xr-x 3 hadoop hadoop 4096 May 31 22:50 examples/
-rw-r--r-- 1 hadoop hadoop 135109092 Jun 27 2018 phoenix-5.0.0-HBase-2.0-client.jar
-rw-r--r-- 1 hadoop hadoop 110858350 Jun 27 2018 phoenix-5.0.0-HBase-2.0-hive.jar
-rw-r--r-- 1 hadoop hadoop 137968668 Jun 27 2018 phoenix-5.0.0-HBase-2.0-pig.jar
-rw-r--r-- 1 hadoop hadoop 7931132 Jun 27 2018 phoenix-5.0.0-HBase-2.0-queryserver.jar
-rw-r--r-- 1 hadoop hadoop 41800313 Jun 27 2018 phoenix-5.0.0-HBase-2.0-server.jar
-rw-r--r-- 1 hadoop hadoop 34159398 Jun 27 2018 phoenix-5.0.0-HBase-2.0-thin-client.jar
-rw-r--r-- 1 hadoop hadoop 2582830 Jun 27 2018 phoenix-core-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 2464127 Jun 27 2018 phoenix-core-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 4317285 Jun 27 2018 phoenix-core-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 30171 Jun 27 2018 phoenix-flume-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 37284 Jun 27 2018 phoenix-flume-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 47912 Jun 27 2018 phoenix-flume-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 85153 Jun 27 2018 phoenix-hive-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 78698 Jun 27 2018 phoenix-hive-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 139358 Jun 27 2018 phoenix-hive-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 701717 Jun 27 2018 phoenix-kafka-5.0.0-HBase-2.0-minimal.jar
-rw-r--r-- 1 hadoop hadoop 17292 Jun 27 2018 phoenix-kafka-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 23673 Jun 27 2018 phoenix-kafka-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 27314 Jun 27 2018 phoenix-kafka-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 13043 Jun 27 2018 phoenix-load-balancer-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 22960 Jun 27 2018 phoenix-load-balancer-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 3649883 Jun 27 2018 phoenix-pherf-5.0.0-HBase-2.0-minimal.jar
-rw-r--r-- 1 hadoop hadoop 117898 Jun 27 2018 phoenix-pherf-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 71056 Jun 27 2018 phoenix-pherf-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 166993 Jun 27 2018 phoenix-pherf-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 29760 Jun 27 2018 phoenix-pig-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 46425 Jun 27 2018 phoenix-pig-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 45806 Jun 27 2018 phoenix-pig-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 22595 Jun 27 2018 phoenix-queryserver-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 59481 Jun 27 2018 phoenix-queryserver-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 30825 Jun 27 2018 phoenix-queryserver-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 13491 Jun 27 2018 phoenix-queryserver-client-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 10914 Jun 27 2018 phoenix-queryserver-client-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 16785 Jun 27 2018 phoenix-queryserver-client-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 3569 Jun 27 2018 phoenix-spark-5.0.0-HBase-2.0-javadoc.jar
-rw-r--r-- 1 hadoop hadoop 25584 Jun 27 2018 phoenix-spark-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 127007 Jun 27 2018 phoenix-spark-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 88764 Jun 27 2018 phoenix-spark-5.0.0-HBase-2.0.jar
-rw-r--r-- 1 hadoop hadoop 2730675 Jun 27 2018 phoenix-tracing-webapp-5.0.0-HBase-2.0-runnable.jar
-rw-r--r-- 1 hadoop hadoop 11826 Jun 27 2018 phoenix-tracing-webapp-5.0.0-HBase-2.0-sources.jar
-rw-r--r-- 1 hadoop hadoop 8043 Jun 27 2018 phoenix-tracing-webapp-5.0.0-HBase-2.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 16290 Jun 27 2018 phoenix-tracing-webapp-5.0.0-HBase-2.0.jar
drwxr-xr-x 6 hadoop hadoop 4096 May 31 22:50 python/
root@kylin:/opt/phoenix# vim /home/hadoop/.bashrc
---
# Phoenix
export PHOENIX_HOME=/opt/phoenix/current
export PATH=$PATH:$PHOENIX_HOME/bin
---
root@kylin:/opt/phoenix# source /home/hadoop/.bashrc
hadoop@kylin:/opt/phoenix/current/bin$ cp /etc/hbase/conf/hbase-site.xml /opt/phoenix/current/bin/
hadoop@kylin:/opt/phoenix/current/bin$ ./sqlline.py localhost
/usr/bin/env: ‘python’: No such file or directory
hadoop@kylin:/opt/phoenix/current/bin$ su root
root@kylin:~# apt-get install python
...
hadoop@kylin:/opt/phoenix/current/bin$ ./sqlline.py localhost
Setting property: [incremental, false]
Setting property: [isolation, TRANSACTION_READ_COMMITTED]
issuing: !connect jdbc:phoenix:localhost none none org.apache.phoenix.jdbc.PhoenixDriver
Connecting to jdbc:phoenix:localhost
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/phoenix/apache-phoenix-5.0.0-HBase-2.0-bin/phoenix-5.0.0-HBase-2.0-client.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
20/06/01 19:53:50 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
# phoenix启动报错
Error: org.apache.hadoop.hbase.DoNotRetryIOException: Unable to load configured region split policy 'org.apache.phoenix.schema.MetaDataSplitPolicy' for table 'SYSTEM.CATALOG' Set hbase.table.sanity.checks to false at conf or table descriptor if you want to bypass sanity checks
......
sqlline version 1.2.0
0: jdbc:phoenix:localhost> show databases;
No current connection
0: jdbc:phoenix:localhost>
hadoop@kylin:/opt/phoenix/current/bin$ cp ../*.jar /opt/hbase/current/lib/
hadoop@kylin:/opt/phoenix/current/bin$ ./sqlline.py localhost
Setting property: [incremental, false]
Setting property: [isolation, TRANSACTION_READ_COMMITTED]
issuing: !connect jdbc:phoenix:localhost none none org.apache.phoenix.jdbc.PhoenixDriver
Connecting to jdbc:phoenix:localhost
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/phoenix/apache-phoenix-5.0.0-HBase-2.0-bin/phoenix-5.0.0-HBase-2.0-client.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
20/06/01 20:51:32 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Connected to: Phoenix (version 5.0)
Driver: PhoenixEmbeddedDriver (version 5.0)
Autocommit status: true
Transaction isolation: TRANSACTION_READ_COMMITTED
Building list of tables and columns for tab-completion (set fastconnect to true to skip)...
133/133 (100%) Done
Done
sqlline version 1.2.0
0: jdbc:phoenix:localhost> !tables
+------------+--------------+-------------+---------------+----------+------------+----------------------------+------------+
| TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERA |
+------------+--------------+-------------+---------------+----------+------------+----------------------------+------------+
| | SYSTEM | CATALOG | SYSTEM TABLE | | | | |
| | SYSTEM | FUNCTION | SYSTEM TABLE | | | | |
| | SYSTEM | LOG | SYSTEM TABLE | | | | |
| | SYSTEM | SEQUENCE | SYSTEM TABLE | | | | |
| | SYSTEM | STATS | SYSTEM TABLE | | | | |
+------------+--------------+-------------+---------------+----------+------------+----------------------------+------------+
0: jdbc:phoenix:localhost> !sql
. . . . . . . . . . . . .> create table test1 (mykey integer not null primary key, mycolumn varchar);
No rows affected (2.524 seconds)
0: jdbc:phoenix:localhost> !sql
. . . . . . . . . . . . .> upsert into test1 values (1,'Hello');
1 row affected (0.395 seconds)
0: jdbc:phoenix:localhost> !sql
. . . . . . . . . . . . .> select * from test1;
+--------+-----------+
| MYKEY | MYCOLUMN |
+--------+-----------+
| 1 | Hello |
+--------+-----------+
1 row selected (0.144 seconds)
0: jdbc:phoenix:localhost> !sql
. . . . . . . . . . . . .> delete from test1 where mykey = 1;
1 row affected (0.042 seconds)
0: jdbc:phoenix:localhost> select * from test1;
+--------+-----------+
| MYKEY | MYCOLUMN |
+--------+-----------+
+--------+-----------+
No rows selected (0.074 seconds)
0: jdbc:phoenix:localhost> !quit
Closing: org.apache.phoenix.jdbc.PhoenixConnection
hadoop@kylin:/opt/phoenix/current/bin$
# $PHOENIX_HOME/bin/sqlline.py localhost
#zookeeper 3.6.1
#root@kylin:~/wget# wget https://mirrors.tuna.tsinghua.edu.cn/apache/zookeeper/zookeeper-3.6.1/apache-zookeeper-3.6.1-bin.tar.gz
#
root@kylin:~/wget# mkdir /opt/zookeeper
root@kylin:~/wget# tar -zxf apache-zookeeper-3.6.1-bin.tar.gz -C /opt/zookeeper/
root@kylin:~/wget# cd /opt/zookeeper/
root@kylin:/opt/zookeeper# mv apache-zookeeper-3.6.1-bin zookeeper-3.6.1
root@kylin:/opt/zookeeper# ln -s zookeeper-3.6.1 current
root@kylin:/opt/zookeeper# chown -R hadoop:hadoop /opt/zookeeper/
root@kylin:/opt/zookeeper# ll
total 12
drwxr-xr-x 3 hadoop hadoop 4096 May 30 22:56 ./
drwxr-xr-x 6 root root 4096 May 30 22:53 ../
lrwxrwxrwx 1 hadoop hadoop 15 May 30 22:56 current -> zookeeper-3.6.1/
drwxr-xr-x 6 hadoop hadoop 4096 May 30 22:53 zookeeper-3.6.1/
root@kylin:/opt/zookeeper# ll current/
total 48
drwxr-xr-x 6 hadoop hadoop 4096 May 30 22:53 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 30 22:56 ../
-rw-r--r-- 1 hadoop hadoop 11358 Apr 21 22:59 LICENSE.txt
-rw-r--r-- 1 hadoop hadoop 432 Apr 21 22:59 NOTICE.txt
-rw-r--r-- 1 hadoop hadoop 1963 Apr 21 22:59 README.md
-rw-r--r-- 1 hadoop hadoop 3166 Apr 21 22:59 README_packaging.md
drwxr-xr-x 2 hadoop hadoop 4096 Apr 21 22:59 bin/
drwxr-xr-x 2 hadoop hadoop 4096 Apr 21 22:59 conf/
drwxr-xr-x 5 hadoop hadoop 4096 Apr 21 23:00 docs/
drwxr-xr-x 2 hadoop hadoop 4096 May 30 22:53 lib/
root@kylin:/opt/zookeeper# mkdir /etc/zookeeper
root@kylin:/opt/zookeeper# cp -r /opt/zookeeper/current/conf /etc/zookeeper/
root@kylin:/opt/zookeeper# chown -R hadoop:hadoop /etc/zookeeper/
root@kylin:/opt/zookeeper# vim /home/hadoop/.bashrc
---
# Zookeeper
export ZK_HOME=/opt/zookeeper/current
export ZK_CONF_DIR=/etc/zookeeper/conf
export PATH=$PATH:$ZK_HOME/bin
---
root@kylin:/opt/zookeeper# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop
hadoop@kylin:~$ cd /opt/zookeeper/current
hadoop@kylin:/opt/zookeeper/current$ mkdir data
hadoop@kylin:/opt/zookeeper/current$ mkdir log
hadoop@kylin:~$ cd /etc/zookeeper/conf/
hadoop@kylin:/etc/zookeeper/conf$ ll
total 20
drwxr-xr-x 2 hadoop hadoop 4096 May 30 22:59 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 30 22:59 ../
-rw-r--r-- 1 hadoop hadoop 535 May 30 22:59 configuration.xsl
-rw-r--r-- 1 hadoop hadoop 3435 May 30 22:59 log4j.properties
-rw-r--r-- 1 hadoop hadoop 1148 May 30 22:59 zoo_sample.cfg
hadoop@kylin:/etc/zookeeper/conf$ cp zoo_sample.cfg zoo.cfg
hadoop@kylin:/etc/zookeeper/conf$ vim zoo.cfg
---
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
---
# Zookeeper独立模式
# 以独立模式运行ZooKeeper便于评估,某些开发和测试。但是在生产中,您应该以复制模式运行ZooKeeper。
# 同一应用程序中的一组服务器的复制组称为仲裁,并且在复制模式下,仲裁中的所有服务器都具有相同配置文件的副本。
hadoop@kylin:~$ cat /etc/zookeeper/conf/zoo.cfg |grep -v ^#|grep -v ^$
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
clientPort=2181
#设置Zookeeper复制模式
#对于复制模式,至少需要三台服务器,强烈建议您使用奇数个服务器。如果只有两台服务器,则可能会出现这样的情况:
#如果其中一台服务器发生故障,则没有足够的计算机构成多数仲裁。由于存在两个单点故障,因此两个服务器本来就不如单个服务器稳定。
#
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
clientPort=2181
server.1=<zoo1>:2888:3888
server.2=<zoo2>:2888:3888
server.3=<zoo3>:2888:3888
# echo "1" > /opt/zookeeper/current/data/myid #zoo1
# echo "2" > /opt/zookeeper/current/data/myid #zoo2
# echo "3" > /opt/zookeeper/current/data/myid #zoo3
hadoop@kylin:~$ cd $ZK_HOME/conf
hadoop@kylin:/opt/zookeeper/current/conf$ ln -s /etc/zookeeper/conf/zoo.cfg zoo.cfg
hadoop@kylin:/opt/zookeeper/current/conf$ ll
total 20
drwxr-xr-x 2 hadoop hadoop 4096 May 30 23:19 ./
drwxr-xr-x 8 hadoop hadoop 4096 May 30 23:13 ../
-rw-r--r-- 1 hadoop hadoop 535 Apr 21 22:59 configuration.xsl
-rw-r--r-- 1 hadoop hadoop 3435 Apr 21 22:59 log4j.properties
lrwxrwxrwx 1 hadoop hadoop 27 May 30 23:19 zoo.cfg -> /etc/zookeeper/conf/zoo.cfg
-rw-r--r-- 1 hadoop hadoop 1148 Apr 21 22:59 zoo_sample.cfg
hadoop@kylin:~$ $ZK_HOME/bin/zkServer.sh start [-server localhost:2181]
ZooKeeper JMX enabled by default
Using config: /opt/zookeeper/current/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
hadoop@kylin:~$ $ZK_HOME/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /opt/zookeeper/current/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: standalone
hadoop@kylin:~$ ps -ef | grep zookeeper
hadoop@kylin:~$ cp /etc/zookeeper/conf/zoo.cfg /etc/hbase/conf/
hadoop@kylin:/opt/hbase/current/conf$ ln -s /etc/hbase/conf/zoo.cfg zoo.cfg
hadoop@kylin:/opt/hbase/current/conf$ ll
total 52
drwxr-xr-x 2 hadoop hadoop 4096 May 30 23:31 ./
drwxr-xr-x 8 hadoop hadoop 4096 May 30 22:18 ../
-rw-r--r-- 1 hadoop hadoop 1811 May 2 2018 hadoop-metrics2-hbase.properties
-rw-r--r-- 1 hadoop hadoop 4284 Nov 28 2019 hbase-env.cmd
-rw-r--r-- 1 hadoop hadoop 7536 Jan 20 13:23 hbase-env.sh
-rw-r--r-- 1 hadoop hadoop 2257 May 2 2018 hbase-policy.xml
-rw-r--r-- 1 hadoop hadoop 934 May 2 2018 hbase-site.xml
-rw-r--r-- 1 hadoop hadoop 1169 Jan 17 12:25 log4j-hbtop.properties
-rw-r--r-- 1 hadoop hadoop 4977 Nov 28 2019 log4j.properties
-rw-r--r-- 1 hadoop hadoop 10 May 2 2018 regionservers
lrwxrwxrwx 1 hadoop hadoop 23 May 30 23:31 zoo.cfg -> /etc/hbase/conf/zoo.cfg
hadoop@kylin:/opt/zookeeper/current/conf$ sed -i 's/INFO/ERROR/' log4j.properties
hadoop@kylin:/opt/zookeeper/current/conf$ $ZK_HOME/bin/zkCli.sh
Connecting to localhost:2181
Welcome to ZooKeeper!
JLine support is enabled
WATCHER::
WatchedEvent state:SyncConnected type:None path:null
[zk: localhost:2181(CONNECTED) 0] ls /
[hbase, zookeeper]
[zk: localhost:2181(CONNECTED) 2] deleteall /hbase
[zk: localhost:2181(CONNECTED) 3] ls /
[zookeeper]
# 首先检查Zookeeper是否在端口2181上的本地主机上启动
hadoop@kylin:~$ netstat -tunelp | grep 2181 | grep -i LISTEN
(Not all processes could be identified, non-owned process info
will not be shown, you would have to be root to see it all.)
tcp6 0 0 :::2181 :::* LISTEN 1001 74402 15967/java
hadoop@kylin:/etc/kylin/conf$ netstat -anp | grep 2181
#HMaster在启动后10秒左右自动退出问题:
#网上看了许多方案,都没能解决问题!!!
#查看hbase-hadoop-master 日志 :HMaster: Failed to become active master
<!--
hadoop@kylin:/opt/hbase/current/logs$ tail -1000 hbase-hadoop-master-kylin.log | grep 06:30
Sun May 31 06:30:11 CST 2020 Starting master on kylin
2020-05-31 06:30:24,247 INFO [main] util.log: Logging initialized @11657ms
2020-05-31 06:30:24,787 INFO [main] server.Server: jetty-9.3.27.v20190418, build timestamp: 2019-04-19T02:11:38+08:00, git hash: d3e249f86955d04bc646bb620905b7c1bc596a8d
2020-05-31 06:30:24,996 INFO [main] handler.ContextHandler: Started o.e.j.s.ServletContextHandler@4e6f2bb5{/logs,file:///opt/hbase/hbase-2.2.4/logs/,AVAILABLE}
2020-05-31 06:30:24,999 INFO [main] handler.ContextHandler: Started o.e.j.s.ServletContextHandler@3f628ce9{/static,file:///opt/hbase/hbase-2.2.4/hbase-webapps/static/,AVAILABLE}
2020-05-31 06:30:25,273 INFO [main] webapp.StandardDescriptorProcessor: NO JSP Support for /, did not find org.eclipse.jetty.jsp.JettyJspServlet
2020-05-31 06:30:25,409 INFO [main] handler.ContextHandler: Started o.e.j.w.WebAppContext@2af4129d{/,file:///opt/hbase/hbase-2.2.4/hbase-webapps/master/,AVAILABLE}{file:/opt/hbase/hbase-2.2.4/hbase-webapps/master}
2020-05-31 06:30:25,436 INFO [main] server.AbstractConnector: Started ServerConnector@295bf2a{HTTP/1.1,[http/1.1]}{0.0.0.0:16010}
2020-05-31 06:30:25,437 INFO [main] server.Server: Started @12854ms
2020-05-31 06:30:27,875 ERROR [master/kylin:16000:becomeActiveMaster] master.HMaster: Failed to become active master
2020-05-31 06:30:27,879 ERROR [master/kylin:16000:becomeActiveMaster] master.HMaster: ***** ABORTING master kylin.localdomain,16000,1590877816126: Unhandled exception. Starting shutdown. *****
2020-05-31 06:30:29,131 INFO [master/kylin:16000] handler.ContextHandler: Stopped o.e.j.w.WebAppContext@2af4129d{/,null,UNAVAILABLE}{file:/opt/hbase/hbase-2.2.4/hbase-webapps/master}
2020-05-31 06:30:29,149 INFO [master/kylin:16000] server.AbstractConnector: Stopped ServerConnector@295bf2a{HTTP/1.1,[http/1.1]}{0.0.0.0:16010}
2020-05-31 06:30:29,152 INFO [master/kylin:16000] handler.ContextHandler: Stopped o.e.j.s.ServletContextHandler@3f628ce9{/static,file:///opt/hbase/hbase-2.2.4/hbase-webapps/static/,UNAVAILABLE}
2020-05-31 06:30:29,153 INFO [master/kylin:16000] handler.ContextHandler: Stopped o.e.j.s.ServletContextHandler@4e6f2bb5{/logs,file:///opt/hbase/hbase-2.2.4/logs/,UNAVAILABLE}
2020-05-31 06:30:29,201 ERROR [main] master.HMasterCommandLine: Master exiting
-->
#解决方法:
#在配置文件 hbase-site.xml中添加:
---
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
---
# 重启服务之后,问题就解决了。
#
# Apache Kylin (3.0.2)
# http://kylin.apache.org/cn/
#
# 适用于Hadoop 3.1 + HBase 2.0
root@kylin:~/wget# wget https://mirrors.tuna.tsinghua.edu.cn/apache/kylin/apache-kylin-3.0.2/apache-kylin-3.0.2-bin-hadoop3.tar.gz
--2020-05-31 06:56:45-- https://mirrors.tuna.tsinghua.edu.cn/apache/kylin/apache-kylin-3.0.2/apache-kylin-3.0.2-bin-hadoop3.tar.gz
Resolving mirrors.tuna.tsinghua.edu.cn (mirrors.tuna.tsinghua.edu.cn)... 101.6.8.193, 2402:f000:1:408:8100::1
Connecting to mirrors.tuna.tsinghua.edu.cn (mirrors.tuna.tsinghua.edu.cn)|101.6.8.193|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 250061858 (238M) [application/octet-stream]
Saving to: ‘apache-kylin-3.0.2-bin-hadoop3.tar.gz’
apache-kylin-3.0.2-bin-hadoop3.tar.g 11%[======> ] 27.08M 2.10MB/s eta 1m 45s
root@kylin:~/wget# mkdir /opt/kylin
root@kylin:~/wget# tar -zxf apache-kylin-3.0.2-bin-hadoop3.tar.gz -C /opt/kylin/
root@kylin:~/wget# cd /opt/kylin/
root@kylin:/opt/kylin# ll
total 12
drwxr-xr-x 3 root root 4096 May 31 14:29 ./
drwxr-xr-x 7 root root 4096 May 31 14:28 ../
drwxr-xr-x 8 root root 4096 May 12 22:16 apache-kylin-3.0.2-bin-hadoop3/
root@kylin:/opt/kylin# ln -s apache-kylin-3.0.2-bin-hadoop3 kylin-3.0.2-hadoop3
root@kylin:/opt/kylin# ln -s kylin-3.0.2-hadoop3 current
root@kylin:/opt/kylin# ll
total 12
drwxr-xr-x 3 root root 4096 May 31 14:32 ./
drwxr-xr-x 7 root root 4096 May 31 14:28 ../
drwxr-xr-x 8 root root 4096 May 12 22:16 apache-kylin-3.0.2-bin-hadoop3/
lrwxrwxrwx 1 root root 19 May 31 14:32 current -> kylin-3.0.2-hadoop3/
lrwxrwxrwx 1 root root 30 May 31 14:31 kylin-3.0.2-hadoop3 -> apache-kylin-3.0.2-bin-hadoop3/
root@kylin:/opt/kylin# chown -R hadoop:hadoop /opt/kylin/
root@kylin:/opt/kylin# ll current
lrwxrwxrwx 1 hadoop hadoop 19 May 31 14:32 current -> kylin-3.0.2-hadoop3/
root@kylin:/opt/kylin# ll current/
total 64
drwxr-xr-x 8 hadoop hadoop 4096 May 12 22:16 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 31 14:32 ../
-rw-r--r-- 1 hadoop hadoop 14725 May 12 22:16 LICENSE
-rw-r--r-- 1 hadoop hadoop 167 May 12 22:16 NOTICE
-rw-r--r-- 1 hadoop hadoop 2245 May 12 22:16 README.md
-rw-r--r-- 1 hadoop hadoop 19 May 12 22:16 VERSION
drwxr-xr-x 2 hadoop hadoop 4096 May 12 22:16 bin/
-rw-r--r-- 1 hadoop hadoop 823 May 12 22:16 commit_SHA1
drwxr-xr-x 2 hadoop hadoop 4096 May 12 22:16 conf/
drwxr-xr-x 3 hadoop hadoop 4096 May 12 22:16 lib/
drwxr-xr-x 4 hadoop hadoop 4096 May 12 22:16 sample_cube/
drwxr-xr-x 9 hadoop hadoop 4096 May 12 22:16 tomcat/
drwxr-xr-x 2 hadoop hadoop 4096 May 12 22:16 tool/
root@kylin:/opt/kylin# mkdir /etc/kylin
root@kylin:/opt/kylin# cp -r /opt/kylin/current/co
commit_SHA1 conf/
root@kylin:/opt/kylin# cp -r /opt/kylin/current/conf /etc/kylin/
root@kylin:/opt/kylin# chown -R hadoop:hadoop /etc/kylin/
root@kylin:/opt/kylin# vim /home/hadoop/.bashrc
---
# Kylin
export KYLIN_HOME=/opt/kylin/current
export KYLIN_CONF_DIR=/etc/kylin/conf
export PATH=$PATH:$KYLIN_HOME/bin
---
root@kylin:/opt/kylin# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop
hadoop@kylin:~$
hadoop@kylin:~$ cd /etc/kylin/conf
hadoop@kylin:/etc/kylin/conf$ cat kylin.properties | grep kylin.job.scheduler.default
#kylin.job.scheduler.default=0
#hadoop@kylin:/etc/kylin/conf$ sed -i 's/#kylin.job.scheduler.default=0/kylin.job.scheduler.default=2/' /etc/kylin/conf/kylin.properties
#hadoop@kylin:/etc/kylin/conf$ cat kylin.properties | grep kylin.job.scheduler.default
#kylin.job.scheduler.default=2
#hadoop@kylin:/etc/kylin/conf$ echo "kylin.job.lock=org.apache.kylin.storage.hbase.util.ZookeeperJobLock" >> /etc/kylin
#/conf/kylin.properties
#hadoop@kylin:/etc/kylin/conf$ cat kylin.properties | grep kylin.job.lock
#kylin.job.lock=org.apache.kylin.storage.hbase.util.ZookeeperJobLock
#---
#hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/#kylin.job.scheduler.default=0/kylin.job.scheduler.default=2/' kylin.properties
#hadoop@kylin:/opt/kylin/current/conf$ echo "kylin.job.lock=org.apache.kylin.storage.hbase.util.ZookeeperJobLock" >> kylin.properties
#hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/#kylin.server.mode=all/kylin.server.mode=all/' kylin.properties
#---
hadoop@kylin:~$ vim /opt/kylin/current/conf/kylin_hive_conf.xml
---
<property>
<name>hive.exec.compress.output</name>
<value>false</value>
<description>Enable compress</description>
</property>
---
hadoop@kylin:~$ cat /opt/kylin/current/conf/kylin.properties | grep kylin.job.scheduler.default
kylin.job.scheduler.default=2
hadoop@kylin:/opt/kylin/current/conf$ cat kylin.properties | grep -v ^#|grep -v ^$
kylin.server.mode=all
hadoop@kylin:~$ vim $KYLIN_HOME/bin/download-spark.sh
hadoop@kylin:~$ $KYLIN_HOME/bin/check-env.sh
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
hadoop@kylin:~$ $KYLIN_HOME/bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
Retrieving hive dependency...
ERROR: Check hive`s usability failed, please check the status of your cluster
hadoop@kylin:~$ vim /home/hadoop/.bashrc
---
export hive_dependency=/etc/hive/conf:/opt/hive/current/lib/*:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-pig-adapter-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-core-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-server-extensions-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-streaming-3.1.2.jar:/opt/hive/current/lib/hive-exec-3.1.2.jar
---
hadoop@kylin:~$ source /home/hadoop/.bashrc
#重启系统
hadoop@kylin:~$ $KYLIN_HOME/bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
Retrieving hive dependency...
Retrieving hbase dependency...
hbase-common lib not found
hadoop@kylin:~$ vim /home/hadoop/.bashrc
---
#export hbase_dependency=/etc/hbase/conf:/opt/hbase/current/lib/*
export HBASE_CLASSPATH=/opt/hbase/current/lib/hbase-common-2.2.4.jar:/opt/hbase/current/lib/*
---
hadoop@kylin:~$ source /home/hadoop/.bashrc
hadoop@kylin:~$ $KYLIN_HOME/bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
Retrieving hive dependency...
Retrieving hbase dependency...
Retrieving hadoop conf dir...
Retrieving kafka dependency...
Retrieving Spark dependency...
spark not found, set SPARK_HOME, or run bin/download-spark.sh
#下载spark,并配置SPARK_HOME
hadoop@kylin:~$ $KYLIN_HOME/bin/kylin.sh start
Retrieving hadoop conf dir...
KYLIN_HOME is set to /opt/kylin/current
Using cached dependency...
Start to check whether we need to migrate acl tables
Using cached dependency...
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
......
A new Kylin instance is started by hadoop. To stop it, run 'kylin.sh stop'
Check the log at /opt/kylin/current/logs/kylin.log
Web UI is at http://kylin.localdomain:7070/kylin
hadoop@kylin:~$ lsof -i:7070
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
java 8348 hadoop 995u IPv6 1010270 0t0 TCP *:7070 (LISTEN)
# user: ADMIN
# passwd: KYLIN
===============================================
# User specific aliases and functions
export JAVA_HOME=/usr/java/default
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME=/opt/hadoop/current
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}
export HTTPFS_CATALINA_HOME=${HADOOP_HOME}/share/hadoop/httpfs/tomcat
export CATALINA_BASE=${HTTPFS_CATALINA_HOME}
export HADOOP_CONF_DIR=/etc/hadoop/conf
export HTTPFS_CONFIG=/etc/hadoop/conf
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
# Hive
export HIVE_HOME=/opt/hive/current
export HCAT_HOME=$HIVE_HOME/hcatalog
export HIVE_CONF_DIR=/etc/hive/conf
export PATH=$PATH:$HIVE_HOME/bin
export hive_dependency=/etc/hive/conf:/opt/hive/current/lib/*:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-pig-adapter-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-core-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-server-extensions-3.1.2.jar:/opt/hive/current/hcatalog/share/hcatalog/hive-hcatalog-streaming-3.1.2.jar:/opt/hive/current/lib/hive-exec-3.1.2.jar
# HBase
export HBASE_HOME=/opt/hbase/current
export HBASE_CONF_DIR=/etc/hbase/conf
export PATH=$PATH:$HBASE_HOME/bin
#export hbase_dependency=/etc/hbase/conf:/opt/hbase/current/lib/*
export HBASE_CLASSPATH=/opt/hbase/current/lib/hbase-common-2.2.4.jar:/opt/hbase/current/lib/*
# Zookeeper
export ZK_HOME=/opt/zookeeper/current
export ZK_CONF_DIR=/etc/zookeeper/conf
export PATH=$PATH:$ZK_HOME/bin
# Kylin
export KYLIN_HOME=/opt/kylin/current
export KYLIN_CONF_DIR=/etc/kylin/conf
export PATH=$PATH:$KYLIN_HOME/bin
#Spark
export SPARK_HOME=/opt/spark/current
export SPARK_CONF_DIR=/etc/spark/conf
export PATH=$PATH:$SPARK_HOME/bin
===============================================
#使用kylin sample
# 个人参考博客 https://blog.csdn.net/weixin_45883933/article/details/105409492?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159095082319195264554930%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fblog.%2522%257D&request_id=159095082319195264554930&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~blog~first_rank_v1~rank_blog_v1-2-105409492.pc_v1_rank_blog_v1&utm_term=kylin
hadoop@kylin:/opt/kylin/current/bin$ ll | grep sample
-rwxr-xr-x 1 hadoop hadoop 1299 May 12 22:16 sample-streaming.sh*
-rwxr-xr-x 1 hadoop hadoop 6004 May 12 22:16 sample.sh*
hadoop@kylin:/opt/kylin/current/bin$ ./sample.sh
。。。。。
Sample cube is created successfully in project 'learn_kylin'.
Restart Kylin Server or click Web UI => System Tab => Reload Metadata to take effect
hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/INFO/WARN/' /etc/kylin/conf/kylin-server-log4j.properties
hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/INFO/WARN/' /etc/kylin/conf/kylin-tools-log4j.properties
hadoop@kylin:/opt/kylin/current/conf$ sed -i 's/INFO/WARN/' /etc/kylin/conf/kylin-spark-log4j.properties
---
# 启动kylin时出现class冲突时错误操作!!!
#hadoop@kylin:~$ mv /opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar /opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar.bak
# 设置导致错误 HMaster HRegionServer启动自动退出!且Hadoop有异常。
# 后边改变回来了!!!
---
# 在之后kylin出现服务启动后不能访问Web UI的情况:
---
HTTP Status 404 – 未找到
Type Status Report
消息 Not found
描述 源服务器未能找到目标资源的表示或者是不愿公开一个已经存在的资源表示。
Apache Tomcat/8.5.51
----
# 寻求解决方案:
# Kafka 2.5.0
# Scala 2.12 (2.12 is recommended).
root@kylin:~/wget# wget https://mirror.bit.edu.cn/apache/kafka/2.5.0/kafka_2.12-2.5.0.tgz
--2020-06-01 03:16:23-- https://mirror.bit.edu.cn/apache/kafka/2.5.0/kafka_2.12-2.5.0.tgz
Resolving mirror.bit.edu.cn (mirror.bit.edu.cn)... 219.143.204.117, 202.204.80.77, 2001:da8:204:1205::22
Connecting to mirror.bit.edu.cn (mirror.bit.edu.cn)|219.143.204.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 61604633 (59M) [application/octet-stream]
Saving to: ‘kafka_2.12-2.5.0.tgz’
kafka_2.12-2.5.0.tgz 14%[=========> ] 8.79M 1.22MB/s eta 47s
root@kylin:~/wget# mkdir /opt/kafka
root@kylin:~/wget# tar -zxf kafka_2.12-2.5.0.tgz -C /opt/kafka/
root@kylin:~/wget# cd /opt/kafka/
root@kylin:/opt/kafka# ll
total 12
drwxr-xr-x 3 root root 4096 Jun 1 22:18 ./
drwxr-xr-x 10 root root 4096 Jun 1 22:16 ../
drwxr-xr-x 6 root root 4096 Apr 8 09:16 kafka_2.12-2.5.0/
root@kylin:/opt/kafka# ln -s kafka_2.12-2.5.0 current
root@kylin:/opt/kafka# chown -R hadoop:hadoop /opt/kafka/
root@kylin:/opt/kafka# ll current/
total 60
drwxr-xr-x 6 hadoop hadoop 4096 Apr 8 09:16 ./
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:19 ../
-rw-r--r-- 1 hadoop hadoop 32216 Apr 8 09:13 LICENSE
-rw-r--r-- 1 hadoop hadoop 337 Apr 8 09:13 NOTICE
drwxr-xr-x 3 hadoop hadoop 4096 Apr 8 09:16 bin/
drwxr-xr-x 2 hadoop hadoop 4096 Apr 8 09:16 config/
drwxr-xr-x 2 hadoop hadoop 4096 Jun 1 22:18 libs/
drwxr-xr-x 2 hadoop hadoop 4096 Apr 8 09:16 site-docs/
root@kylin:/opt/kafka# mkdir /etc/kafka
root@kylin:/opt/kafka# cp -r /opt/kafka/current/config /etc/kafka/
root@kylin:/opt/kafka# chown -R hadoop:hadoop /etc/kafka/
root@kylin:/opt/kafka# vim /home/hadoop/.bashrc
---
# Kafka
export KAFKA_HOME=/opt/kafka/current
export KAFKA_CONF_DIR=/etc/kafka/config
export PATH=$PATH:$KAFKA_HOME/bin
---
root@kylin:/opt/kafka# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop
hadoop@kylin:~$ cd /etc/kafka/config/
hadoop@kylin:/etc/kafka/config$ ll
total 80
drwxr-xr-x 2 hadoop hadoop 4096 Jun 1 22:23 ./
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:23 ../
-rw-r--r-- 1 hadoop hadoop 906 Jun 1 22:23 connect-console-sink.properties
-rw-r--r-- 1 hadoop hadoop 909 Jun 1 22:23 connect-console-source.properties
-rw-r--r-- 1 hadoop hadoop 5321 Jun 1 22:23 connect-distributed.properties
-rw-r--r-- 1 hadoop hadoop 883 Jun 1 22:23 connect-file-sink.properties
-rw-r--r-- 1 hadoop hadoop 881 Jun 1 22:23 connect-file-source.properties
-rw-r--r-- 1 hadoop hadoop 2247 Jun 1 22:23 connect-log4j.properties
-rw-r--r-- 1 hadoop hadoop 2540 Jun 1 22:23 connect-mirror-maker.properties
-rw-r--r-- 1 hadoop hadoop 2262 Jun 1 22:23 connect-standalone.properties
-rw-r--r-- 1 hadoop hadoop 1221 Jun 1 22:23 consumer.properties
-rw-r--r-- 1 hadoop hadoop 4675 Jun 1 22:23 log4j.properties
-rw-r--r-- 1 hadoop hadoop 1925 Jun 1 22:23 producer.properties
-rw-r--r-- 1 hadoop hadoop 6849 Jun 1 22:23 server.properties
-rw-r--r-- 1 hadoop hadoop 1032 Jun 1 22:23 tools-log4j.properties
-rw-r--r-- 1 hadoop hadoop 1169 Jun 1 22:23 trogdor.conf
-rw-r--r-- 1 hadoop hadoop 1205 Jun 1 22:23 zookeeper.properties
hadoop@kylin:/etc/kafka/config$ cat server.properties |grep -v ^#|grep -v ^$
broker.id=0
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/tmp/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=localhost:2181
zookeeper.connection.timeout.ms=18000
group.initial.rebalance.delay.ms=0
hadoop@kylin:/etc/kafka/config$ echo "listeners=PLAINTEXT://localhost:9092" >> server.properties
hadoop@kylin:/etc/kafka/config$ echo "delete.topic.enble=true" >> server.properties
hadoop@kylin:/etc/kafka/config$ cat server.properties | grep tmp
log.dirs=/tmp/kafka-logs
hadoop@kylin:/etc/kafka/config$ ll $KAFKA_HOME
lrwxrwxrwx 1 hadoop hadoop 16 Jun 1 22:19 /opt/kafka/current -> kafka_2.12-2.5.0/
hadoop@kylin:/etc/kafka/config$ ll $KAFKA_HOME/
total 60
drwxr-xr-x 6 hadoop hadoop 4096 Apr 8 09:16 ./
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:19 ../
-rw-r--r-- 1 hadoop hadoop 32216 Apr 8 09:13 LICENSE
-rw-r--r-- 1 hadoop hadoop 337 Apr 8 09:13 NOTICE
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:47 bin/
drwxr-xr-x 2 hadoop hadoop 4096 Apr 8 09:16 config/
drwxr-xr-x 2 hadoop hadoop 4096 Jun 1 22:18 libs/
drwxr-xr-x 2 hadoop hadoop 4096 Apr 8 09:16 site-docs/
hadoop@kylin:/etc/kafka/config$ pwd
/etc/kafka/config
hadoop@kylin:/etc/kafka/config$ mkdir /opt/kafka/current/kafka-logs
hadoop@kylin:/etc/kafka/config$ vim server.properties
hadoop@kylin:/etc/kafka/config$ cat server.properties |grep -v ^#|grep -v ^$
broker.id=0
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/opt/kafka/current/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=localhost:2181
zookeeper.connection.timeout.ms=18000
group.initial.rebalance.delay.ms=0
listeners=PLAINTEXT://localhost:9092
delete.topic.enble=true
# hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server.properties &
# 会输出日志并停留启动界面
#
# Kafka Start
hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server.properties > $KAFKA_HOME/logs/nohup.out 2>&1 &
[1] 24277
hadoop@kylin:~$ jps | grep Kafka
24277 Kafka
# Kafka Stop
hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server.properties > $KAFKA_HOME/logs/nohup.out 2>&1 &
#为后期不能预测的可能做配置
hadoop@kylin:/opt/kafka/current/config$ mv server.properties server.properties.bak
hadoop@kylin:/opt/kafka/current/config$ ln -s /etc/kafka/config/server.properties server.properties
# http://kafka.apache.org/quickstart
#
hadoop@kylin:~/scripts$ chmod +x kafka_*
hadoop@kylin:~/scripts$ ll
total 24
drwxrwxr-x 2 hadoop hadoop 4096 Jun 2 00:36 ./
drwxr-xr-x 11 hadoop hadoop 4096 Jun 2 00:36 ../
-rwxrwxr-x 1 hadoop hadoop 172 Jun 2 00:36 kafka_Consumer.sh*
-rwxrwxr-x 1 hadoop hadoop 192 Jun 2 00:23 kafka_CreateTopic.sh*
-rwxrwxr-x 1 hadoop hadoop 86 Jun 2 00:27 kafka_ListTopic.sh*
-rwxrwxr-x 1 hadoop hadoop 155 Jun 2 00:33 kafka_Producer.sh*
hadoop@kylin:~/scripts$ mv kafka_CreateTopic.sh 1-kafka_CreateTopic.sh
hadoop@kylin:~/scripts$ mv kafka_ListTopic.sh 2-kafka_ListTopic.sh
hadoop@kylin:~/scripts$ mv kafka_Producer.sh 3-kafka_Producer.sh
hadoop@kylin:~/scripts$ mv kafka_Consumer.sh 4-kafka_Consumer.sh
hadoop@kylin:~/scripts$ ll
total 24
drwxrwxr-x 2 hadoop hadoop 4096 Jun 2 00:41 ./
drwxr-xr-x 11 hadoop hadoop 4096 Jun 2 00:36 ../
-rwxrwxr-x 1 hadoop hadoop 192 Jun 2 00:23 1-kafka_CreateTopic.sh*
-rwxrwxr-x 1 hadoop hadoop 86 Jun 2 00:27 2-kafka_ListTopic.sh*
-rwxrwxr-x 1 hadoop hadoop 155 Jun 2 00:33 3-kafka_Producer.sh*
-rwxrwxr-x 1 hadoop hadoop 172 Jun 2 00:36 4-kafka_Consumer.sh*
# Use Kafka
# 创建主题
hadoop@kylin:~/scripts$ cat 1-kafka_CreateTopic.sh
#!/bin/bash
read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic $TopicName
# 查看主题
hadoop@kylin:~/scripts$ cat 2-kafka_ListTopic.sh
#!/bin/bash
$KAFKA_HOME/bin/kafka-topics.sh --list --bootstrap-server localhost:9092
# 发送消息
hadoop@kylin:~/scripts$ cat 3-kafka_Producer.sh
#!/bin/bash
read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-console-producer.sh --bootstrap-server localhost:9092 --topic $TopicName
# 接收消息
hadoop@kylin:~/scripts$ cat 4-kafka_Consumer.sh
#!/bin/bash
read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic $TopicName --from-beginning
hadoop@kylin:~/scripts$ ./1-kafka_CreateTopic.sh
Please input a topic name:test
Created topic test.
hadoop@kylin:~/scripts$ ./2-kafka_ListTopic.sh
test
# shell 1
hadoop@kylin:~/scripts$ ./3-kafka_Producer.sh
Please input a topic name:test
>a
>b
>c
>^C
# shell 2
hadoop@kylin:~/scripts$ ./4-kafka_Consumer.sh
Please input a topic name:test
a
b
c
^CProcessed a total of 3 messages
# 建立多经纪商丛集
# 到目前为止,我们一直在与单一经纪人竞争,但这并不有趣。对于Kafka来说,单个代理只是一个大小为1的集群,
# 因此除了启动更多的代理实例之外,没有什么太大的变化。但是,只是为了感受一下,让我们将集群扩展到三个节点(仍然全部在本地计算机上)。
hadoop@kylin:~/scripts$ cd $KAFKA_CONF_DIR
hadoop@kylin:/etc/kafka/config$ cp server.properties server-1.properties
hadoop@kylin:/etc/kafka/config$ cp server.properties server-2.properties
hadoop@kylin:/etc/kafka/config$ vim server-1.properties
hadoop@kylin:/etc/kafka/config$ cat server-1.properties |grep -v ^#|grep -v ^$
broker.id=1
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/opt/kafka/current/kafka-logs-1
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=localhost:2181
zookeeper.connection.timeout.ms=18000
group.initial.rebalance.delay.ms=0
listeners=PLAINTEXT://localhost:9093
delete.topic.enble=true
hadoop@kylin:/etc/kafka/config$ vim server-2.properties
hadoop@kylin:/etc/kafka/config$ cat server-2.properties |grep -v ^#|grep -v ^$
broker.id=2
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/opt/kafka/current/kafka-logs-2
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=localhost:2181
zookeeper.connection.timeout.ms=18000
group.initial.rebalance.delay.ms=0
listeners=PLAINTEXT://localhost:9094
delete.topic.enble=true
hadoop@kylin:/etc/kafka/config$ mkdir /opt/kafka/current/kafka-logs-1
hadoop@kylin:/etc/kafka/config$ mkdir /opt/kafka/current/kafka-logs-2
hadoop@kylin:/etc/kafka/config$ cd -
/opt/kafka/current
hadoop@kylin:/opt/kafka/current$ ll
total 76
drwxr-xr-x 10 hadoop hadoop 4096 Jun 2 01:29 ./
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:19 ../
-rw-r--r-- 1 hadoop hadoop 32216 Apr 8 09:13 LICENSE
-rw-r--r-- 1 hadoop hadoop 337 Apr 8 09:13 NOTICE
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:47 bin/
drwxr-xr-x 2 hadoop hadoop 4096 Jun 1 23:35 config/
drwxrwxr-x 53 hadoop hadoop 4096 Jun 2 01:29 kafka-logs/
drwxrwxr-x 2 hadoop hadoop 4096 Jun 2 01:29 kafka-logs-1/
drwxrwxr-x 2 hadoop hadoop 4096 Jun 2 01:29 kafka-logs-2/
drwxr-xr-x 2 hadoop hadoop 4096 Jun 1 22:18 libs/
drwxrwxr-x 2 hadoop hadoop 4096 Jun 2 01:07 logs/
drwxr-xr-x 2 hadoop hadoop 4096 Apr 8 09:16 site-docs/
hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server-1.properties > $KAFKA_HOME/logs/nohup.out-1 2>&1 &
[2] 28303
hadoop@kylin:~$ nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server-2.properties > $KAFKA_HOME/logs/nohup.out-2 2>&1 &
[3] 28724
hadoop@kylin:~$ jps | grep Kafka
28303 Kafka
28724 Kafka
24277 Kafka
nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server-1.properties > $KAFKA_HOME/logs/nohup.out-1 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server-2.properties > $KAFKA_HOME/logs/nohup.out-2 2>&1 &
# 创建一个具有三个复制因子的主题:
hadoop@kylin:~/scripts$ cat 1-kafka_CreateTopic3f.sh
#!/bin/bash
read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 3 --partitions 1 --topic $TopicName
# 现在有了集群,我们如何知道哪个经纪人在做什么?要查看该命令,请运行“描述主题”命令
hadoop@kylin:~/scripts$ cat 2-kafka_DescribeTopic.sh
#!/bin/bash
read -p "Please input a topic name:" TopicName
$KAFKA_HOME/bin/kafka-topics.sh --describe --bootstrap-server localhost:9092 --topic $TopicName
hadoop@kylin:~/scripts$ ./2-kafka_DescribeTopic.sh
Please input a topic name:test
Topic: test PartitionCount: 1 ReplicationFactor: 1 Configs: segment.bytes=1073741824
Topic: test Partition: 0 Leader: 0 Replicas: 0 Isr: 0
# 创建主题 my-replicated-topic
hadoop@kylin:~/scripts$ ./1-kafka_CreateTopic3f.sh
Please input a topic name:my-replicated-topic
Created topic my-replicated-topic.
hadoop@kylin:~/scripts$ ./2-kafka_DescribeTopic.sh
Please input a topic name:my-replicated-topic
Topic: my-replicated-topic PartitionCount: 1 ReplicationFactor: 3 Configs: segment.bytes=1073741824
Topic: my-replicated-topic Partition: 0 Leader: 1 Replicas: 1,0,2 Isr: 1,0,2
hadoop@kylin:~/scripts$ ps aux | grep server-1.properties | awk 'NR==1{print}' | awk '{print $2}'
28303
hadoop@kylin:~/scripts$ jps | grep Kafka
28303 Kafka
28724 Kafka
24277 Kafka
# kafka Connect
# Kafka Connect是Kafka附带的工具,用于将数据导入和导出到Kafka。
# 它是运行连接器的可扩展工具,该连接器实现用于与外部系统进行交互的自定义逻辑。
# 在此快速入门中,我们将看到如何使用简单的连接器运行Kafka Connect,该连接器将数据从文件导入到Kafka主题,并将数据从Kafka主题导出到文件。
#nohup $KAFKA_HOME/bin/connect-standalone.sh $KAFKA_CONF_DIR/connect-standalone.properties $KAFKA_CONF_DIR/connect-file-source.properties $KAFKA_CONF_DIR/connect-file-sink.properties > $KAFKA_HOME/logs/nohup.out-Connect 2>&1 &
hadoop@kylin:~/scripts$ cd $KAFKA_HOME
hadoop@kylin:/opt/kafka/current$ echo -e "foo\nbar" > test.txt
hadoop@kylin:/opt/kafka/current$ ll
total 80
drwxr-xr-x 10 hadoop hadoop 4096 Jun 2 02:31 ./
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:19 ../
-rw-r--r-- 1 hadoop hadoop 32216 Apr 8 09:13 LICENSE
-rw-r--r-- 1 hadoop hadoop 337 Apr 8 09:13 NOTICE
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:47 bin/
drwxr-xr-x 2 hadoop hadoop 4096 Jun 1 23:35 config/
drwxrwxr-x 54 hadoop hadoop 4096 Jun 2 02:31 kafka-logs/
drwxrwxr-x 3 hadoop hadoop 4096 Jun 2 02:31 kafka-logs-1/
drwxrwxr-x 3 hadoop hadoop 4096 Jun 2 02:31 kafka-logs-2/
drwxr-xr-x 2 hadoop hadoop 4096 Jun 1 22:18 libs/
drwxrwxr-x 2 hadoop hadoop 4096 Jun 2 02:07 logs/
drwxr-xr-x 2 hadoop hadoop 4096 Apr 8 09:16 site-docs/
-rw-rw-r-- 1 hadoop hadoop 8 Jun 2 02:31 test.txt
hadoop@kylin:/opt/kafka/current$ nohup $KAFKA_HOME/bin/connect-standalone.sh $KAFKA_CONF_DIR/connect-standalone.properties $KAFKA_CONF_DIR/connect-file-source.properties $KAFKA_CONF_DIR/connect-file-sink.properties > $KAFKA_HOME/logs/nohup.out-Connect 2>&1 &
[4] 31488
hadoop@kylin:/opt/kafka/current$ jps
17568 RunJar
31488 ConnectStandalone
17792 HMaster
17154 ResourceManager
17444 JobHistoryServer
17608 QuorumPeerMain
17898 HRegionServer
28303 Kafka
28724 Kafka
24277 Kafka
17239 NodeManager
16953 NameNode
31867 Jps
17019 SecondaryNameNode
17084 DataNode
19903 RunJar
17567 RunJar
hadoop@kylin:/opt/kafka/current$ ll
total 84
drwxr-xr-x 10 hadoop hadoop 4096 Jun 2 02:36 ./
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:19 ../
-rw-r--r-- 1 hadoop hadoop 32216 Apr 8 09:13 LICENSE
-rw-r--r-- 1 hadoop hadoop 337 Apr 8 09:13 NOTICE
drwxr-xr-x 3 hadoop hadoop 4096 Jun 1 22:47 bin/
drwxr-xr-x 2 hadoop hadoop 4096 Jun 1 23:35 config/
drwxrwxr-x 54 hadoop hadoop 4096 Jun 2 02:36 kafka-logs/
drwxrwxr-x 3 hadoop hadoop 4096 Jun 2 02:36 kafka-logs-1/
drwxrwxr-x 4 hadoop hadoop 4096 Jun 2 02:36 kafka-logs-2/
drwxr-xr-x 2 hadoop hadoop 4096 Jun 1 22:18 libs/
drwxrwxr-x 2 hadoop hadoop 4096 Jun 2 02:36 logs/
drwxr-xr-x 2 hadoop hadoop 4096 Apr 8 09:16 site-docs/
-rw-rw-r-- 1 hadoop hadoop 8 Jun 2 02:36 test.sink.txt
-rw-rw-r-- 1 hadoop hadoop 8 Jun 2 02:31 test.txt
hadoop@kylin:/opt/kafka/current$ more test.sink.txt
foo
bar
hadoop@kylin:/opt/kafka/current$ /home/hadoop/scripts/4-kafka_Consumer.sh
Please input a topic name:connect-test
{"schema":{"type":"string","optional":false},"payload":"foo"}
{"schema":{"type":"string","optional":false},"payload":"bar"}
# 另一个shell中 执行:hadoop@kylin:/opt/kafka/current$ echo Another line>> test.txt
{"schema":{"type":"string","optional":false},"payload":"Another line"}
# kafka 文档
# http://kafka.apache.org/documentation/
# maxwells
# Maxwell是将mysql binlog中的insert、update等操作提取出来,并以json数据返回作为kafka生产者。
#
# http://maxwells-daemon.io/
# root@kylin:~/wget# wget https://github.com/zendesk/maxwell/releases/download/v1.26.3/maxwell-1.26.3.tar.gz
#
# maxwell quickstart
# http://maxwells-daemon.io/quickstart/
root@kylin:~/wget# mkdir /opt/maxwell
root@kylin:~/wget# tar -zxf maxwell-1.26.3.tar.gz -C /opt/maxwell/
root@kylin:~/wget# cd /opt/maxwell/
root@kylin:/opt/maxwell# ll
total 12
drwxr-xr-x 3 root root 4096 Jun 2 20:45 ./
drwxr-xr-x 11 root root 4096 Jun 2 20:44 ../
drwxr-xr-x 4 root root 4096 Jun 2 20:45 maxwell-1.26.3/
root@kylin:/opt/maxwell# ln -s maxwell-1.26.3 current
root@kylin:/opt/maxwell# ll
total 12
drwxr-xr-x 3 root root 4096 Jun 2 20:45 ./
drwxr-xr-x 11 root root 4096 Jun 2 20:44 ../
lrwxrwxrwx 1 root root 14 Jun 2 20:45 current -> maxwell-1.26.3/
drwxr-xr-x 4 root root 4096 Jun 2 20:45 maxwell-1.26.3/
root@kylin:/opt/maxwell# ll current/
total 76
drwxr-xr-x 4 root root 4096 Jun 2 20:45 ./
drwxr-xr-x 3 root root 4096 Jun 2 20:45 ../
-rw-r--r-- 1 501 staff 548 Apr 23 02:50 LICENSE
-rw-r--r-- 1 501 staff 1429 May 26 13:16 README.md
drwxr-xr-x 2 root root 4096 Jun 2 20:45 bin/
-rw-r--r-- 1 501 staff 20113 May 6 22:47 config.md
-rw-r--r-- 1 501 staff 11574 May 6 22:47 config.properties.example
-rw-r--r-- 1 501 staff 10259 Apr 23 02:50 kinesis-producer-library.properties.example
drwxr-xr-x 3 501 staff 4096 May 26 13:18 lib/
-rw-r--r-- 1 501 staff 470 Apr 23 02:50 log4j2.xml
-rw-r--r-- 1 501 staff 3466 May 26 13:16 quickstart.md
root@kylin:/opt/maxwell# vim current/quickstart.md
root@kylin:/opt/maxwell# chown -R hadoop:hadoop /opt/maxwell/
root@kylin:/opt/maxwell/current# vim /home/hadoop/.bashrc
# Maxwell
export MAXWELL_HOME=/opt/maxwell/current
export MAXWELL_CONF_DIR=/opt/maxwell/current
export PATH=$PATH:$MAXWELL_HOME/bin
root@kylin:/opt/maxwell/current# source /home/hadoop/.bashrc
#配置mysql
# vim vim /etc/mysql/my.cnf
root@kylin:/opt/maxwell# vim /etc/mysql/mysql.conf.d/mysqld.cnf
# maxwell
server_id=1
log-bin=master
binlog_format=row
#或在运行的服务器上:
root@kylin:/opt/maxwell# mysql -u root -p
Enter password:
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 604
Server version: 5.7.30-0ubuntu0.18.04.1-log (Ubuntu)
Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
#打开基于行的复制
#注意:binlog_format是基于会话的属性。您将需要关闭所有活动连接以完全转换为基于行的复制。
mysql> set global binlog_format=ROW;
mysql> set global binlog_row_image=FULL;
#权限: Maxwell需要权限来将状态存储在schema_database选项指定的数据库中(默认值maxwell)。
mysql> CREATE USER 'maxwell'@'%' IDENTIFIED BY '123456';
mysql> GRANT ALL ON maxwell.* TO 'maxwell'@'%';
mysql> GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE ON *.* TO 'maxwell'@'%';
# or for running maxwell locally:
mysql> CREATE USER 'maxwell'@'localhost' IDENTIFIED BY '123456';
mysql> GRANT ALL ON maxwell.* TO 'maxwell'@'localhost';
mysql> GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE ON *.* TO 'maxwell'@'localhost';
mysql> flush privileges;
# 运行maxwell
root@kylin:/opt/maxwell/current# ./bin/maxwell --user='maxwell' --password='123456' --host='127.0.0.1' --producer=stdout
Using kafka version: 1.0.0
21:28:46,334 WARN MaxwellMetrics - Metrics will not be exposed: metricsReportingType not configured.
21:28:48,529 INFO Maxwell - Maxwell v1.26.3 is booting (StdoutProducer), starting at Position[BinlogPosition[mysql_binary_log.000016:60802], lastHeartbeat=1591104258877]
21:28:49,059 INFO MysqlSavedSchema - Restoring schema id 1 (last modified at Position[BinlogPosition[mysql_binary_log.000016:6119], lastHeartbeat=0])
21:28:49,678 INFO BinlogConnectorReplicator - Setting initial binlog pos to: mysql_binary_log.000016:60802
21:28:49,770 INFO BinaryLogClient - Connected to 127.0.0.1:3306 at mysql_binary_log.000016/60802 (sid:6379, cid:14)
21:28:49,771 INFO BinlogConnectorReplicator - Binlog connected.
21:37:04,068 INFO AbstractSchemaStore - storing schema @Position[BinlogPosition[mysql_binary_log.000017:34269], lastHeartbeat=1591105013625] after applying "create table students( id int unsigned not null auto_increment primary key, name char(8) not null, sex char(4) not null, age int unsigned not null, tel char(13) null default '-' )" to maxwell, new schema id is 2
# 用于测试的语句
mysql> create table students(
-> id int unsigned not null auto_increment primary key,
-> name char(8) not null,
-> sex char(4) not null,
-> age tinyint unsigned not null,
-> tel char(13) null default '-'
-> );
# maxwell 配置
root@kylin:/opt/maxwell/current# cp config.properties.example config.properties
root@kylin:/opt/maxwell/current# cat config.properties | grep -v ^# | grep -v ^$
log_level=info
producer=kafka
kafka.bootstrap.servers=localhost:9092
host=localhost
user=maxwell
password=maxwell
kafka.compression.type=snappy
kafka.retries=0
kafka.acks=1
root@kylin:/opt/maxwell/current# vim config.properties
root@kylin:/opt/maxwell/current# cat config.properties | grep -v ^# | grep -v ^$
log_level=info
producer=kafka
kafka.bootstrap.servers=localhost:9092
host=localhost
user=maxwell
password=maxwell
kafka_topic=test
kafka.compression.type=snappy
kafka.retries=0
kafka.acks=all
kinesis_stream=test
# 启动默认配置的maxwell
root@kylin:/home/hadoop/scripts# cat maxwell_start.sh
#!/bin/bash
read -p "Please input a topic name:" TopicName
$MAXWELL_HOME/bin/maxwell --user='maxwell' --password='123456' --host='127.0.0.1' --producer=kafka --kafka.bootstrap.servers=localhost:9092 --kafka_topic=$TopicName
hadoop@kylin:~/scripts$ ./maxwell_start.sh
Please input a topic name:test
Using kafka version: 1.0.0
22:44:41,907 WARN MaxwellMetrics - Metrics will not be exposed: metricsReportingType not configured.
22:44:43,764 INFO ProducerConfig - ProducerConfig values:
acks = 1
batch.size = 16384
bootstrap.servers = [localhost:9092]
buffer.memory = 33554432
client.id =
compression.type = none
connections.max.idle.ms = 540000
enable.idempotence = false
interceptor.classes = null
key.serializer = class org.apache.kafka.common.serialization.StringSerializer
linger.ms = 0
max.block.ms = 60000
max.in.flight.requests.per.connection = 5
max.request.size = 1048576
metadata.max.age.ms = 300000
metric.reporters = []
metrics.num.samples = 2
metrics.recording.level = INFO
metrics.sample.window.ms = 30000
partitioner.class = class org.apache.kafka.clients.producer.internals.DefaultPartitioner
receive.buffer.bytes = 32768
reconnect.backoff.max.ms = 1000
reconnect.backoff.ms = 50
request.timeout.ms = 30000
retries = 0
retry.backoff.ms = 100
sasl.jaas.config = null
sasl.kerberos.kinit.cmd = /usr/bin/kinit
sasl.kerberos.min.time.before.relogin = 60000
sasl.kerberos.service.name = null
sasl.kerberos.ticket.renew.jitter = 0.05
sasl.kerberos.ticket.renew.window.factor = 0.8
sasl.mechanism = GSSAPI
security.protocol = PLAINTEXT
send.buffer.bytes = 131072
ssl.cipher.suites = null
ssl.enabled.protocols = [TLSv1.2, TLSv1.1, TLSv1]
ssl.endpoint.identification.algorithm = null
ssl.key.password = null
ssl.keymanager.algorithm = SunX509
ssl.keystore.location = null
ssl.keystore.password = null
ssl.keystore.type = JKS
ssl.protocol = TLS
ssl.provider = null
ssl.secure.random.implementation = null
ssl.trustmanager.algorithm = PKIX
ssl.truststore.location = null
ssl.truststore.password = null
ssl.truststore.type = JKS
transaction.timeout.ms = 60000
transactional.id = null
value.serializer = class org.apache.kafka.common.serialization.StringSerializer
22:44:43,993 INFO AppInfoParser - Kafka version : 1.0.0
22:44:43,994 INFO AppInfoParser - Kafka commitId : aaa7af6d4a11b29d
22:44:44,079 INFO Maxwell - Maxwell v1.26.3 is booting (MaxwellKafkaProducer), starting at Position[BinlogPosition[mysql_binary_log.000017:61598], lastHeartbeat=1591109008809]
22:44:44,684 INFO MysqlSavedSchema - Restoring schema id 2 (last modified at Position[BinlogPosition[mysql_binary_log.000017:34269], lastHeartbeat=1591105013625])
22:44:45,223 INFO MysqlSavedSchema - Restoring schema id 1 (last modified at Position[BinlogPosition[mysql_binary_log.000016:6119], lastHeartbeat=0])
22:44:45,494 INFO MysqlSavedSchema - beginning to play deltas...
22:44:45,508 INFO MysqlSavedSchema - played 1 deltas in 13ms
22:44:45,788 INFO BinlogConnectorReplicator - Setting initial binlog pos to: mysql_binary_log.000017:61598
22:44:45,905 INFO BinaryLogClient - Connected to 127.0.0.1:3306 at mysql_binary_log.000017/61598 (sid:6379, cid:154)
22:44:45,906 INFO BinlogConnectorReplicator - Binlog connected.
^C22:45:07,513 INFO MaxwellContext - Sending final heartbeat: 1591109107513
22:45:07,536 INFO BinlogConnectorReplicator - received final heartbeat 1591109107513; stopping replicator
22:45:07,539 INFO BinlogConnectorReplicator - Binlog disconnected.
22:45:07,640 INFO TaskManager - Stopping 4 tasks
22:45:07,641 INFO TaskManager - Stopping: com.zendesk.maxwell.schema.PositionStoreThread@27eb9248
22:45:07,642 INFO TaskManager - Stopping: com.zendesk.maxwell.producer.MaxwellKafkaProducerWorker@292c68c5
22:45:07,642 INFO PositionStoreThread - Storing final position: Position[BinlogPosition[mysql_binary_log.000017:65003], lastHeartbeat=1591109107513]
22:45:07,653 INFO KafkaProducer - [Producer clientId=producer-1] Closing the Kafka producer with timeoutMillis = 9223372036854775807 ms.
22:45:07,691 INFO TaskManager - Stopping: com.zendesk.maxwell.bootstrap.BootstrapController@6b5b04b6
22:45:07,691 INFO TaskManager - Stopping: com.zendesk.maxwell.replication.BinlogConnectorReplicator@83e9db1
22:45:09,127 INFO TaskManager - Stopped all tasks
# binlog
#
#开启binlog
vi /etc/mysql/mysql.conf.d/mysqld.cnf
---
log_bin=/var/lib/mysql/mysql_binary_log
---
# 查看是否开启binlog
# 进入mysql客户端:
mysql> show variables like '%log_bin%';
+---------------------------------+---------------------------------------+
| Variable_name | Value |
+---------------------------------+---------------------------------------+
| log_bin | ON |
| log_bin_basename | /var/lib/mysql/mysql_binary_log |
| log_bin_index | /var/lib/mysql/mysql_binary_log.index |
| log_bin_trust_function_creators | OFF |
| log_bin_use_v1_row_events | OFF |
| sql_log_bin | ON |
+---------------------------------+---------------------------------------+
6 rows in set (0.01 sec)
mysql> show variables like 'binlog_format';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| binlog_format | ROW |
+---------------+-------+
1 row in set (0.00 sec)
# Mysql binlog日志有ROW,Statement,MiXED三种格式;
# set global binlog_format='ROW/STATEMENT/MIXED'
# 查看binlog日志文件
sudo /usr/bin/mysqlbinlog /var/lib/mysql/mysql_binary_log.00000x --base64-output=decode-rows -v
# Scala 2.12
# https://www.scala-lang.org/download
# https://www.scala-lang.org/download/2.12.11.html
root@kylin:~/wget# wget https://downloads.lightbend.com/scala/2.12.11/scala-2.12.11.tgz
--2020-06-02 03:02:11-- https://downloads.lightbend.com/scala/2.12.11/scala-2.12.11.tgz
Resolving downloads.lightbend.com (downloads.lightbend.com)... 13.35.50.87, 13.35.50.28, 13.35.50.10, ...
Connecting to downloads.lightbend.com (downloads.lightbend.com)|13.35.50.87|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 20795989 (20M) [application/octet-stream]
Saving to: ‘scala-2.12.11.tgz’
scala-2.12.11.tgz 100%[===================================================================>] 19.83M 1.91MB/s in 12s
2020-06-02 03:02:23 (1.72 MB/s) - ‘scala-2.12.11.tgz’ saved [20795989/20795989]
root@kylin:~/wget# mkdir /usr/scala
root@kylin:~/wget# tar -zxf scala-2.12.11.tgz -C /usr/scala/
root@kylin:~/wget# cd /usr/scala/
root@kylin:/usr/scala# ll scala-2.12.11/
total 40
drwxrwxr-x 6 2000 2000 4096 Mar 16 17:41 ./
drwxr-xr-x 3 root root 4096 Jun 2 12:53 ../
-rw-rw-r-- 1 2000 2000 11357 Mar 16 17:41 LICENSE
-rw-rw-r-- 1 2000 2000 646 Mar 16 17:41 NOTICE
drwxrwxr-x 2 2000 2000 4096 Mar 16 17:41 bin/
drwxrwxr-x 4 2000 2000 4096 Mar 16 17:41 doc/
drwxrwxr-x 2 2000 2000 4096 Mar 16 17:41 lib/
drwxrwxr-x 3 2000 2000 4096 Mar 16 17:41 man/
root@kylin:/usr/scala# ln -s scala-2.12.11 default
root@kylin:/usr/scala# vim /etc/profile
---
export SCALA_HOME=/usr/scala/default
export PATH=$SCALA_HOME/bin:$PATH
---
root@kylin:/usr/scala# source /etc/profile
root@kylin:/usr/scala# scala -version
Scala code runner version 2.12.11 -- Copyright 2002-2020, LAMP/EPFL and Lightbend, Inc.
root@kylin:/usr/scala#
# Spark (3.0.0)
# https://spark.apache.org/downloads.html
root@kylin:~/wget# wget https://mirror.bit.edu.cn/apache/spark/spark-3.0.0-preview2/spark-3.0.0-preview2-bin-hadoop3.2.tgz
--2020-05-31 22:16:56-- https://mirror.bit.edu.cn/apache/spark/spark-3.0.0-preview2/spark-3.0.0-preview2-bin-hadoop3.2.tgz
Resolving mirror.bit.edu.cn (mirror.bit.edu.cn)... 202.204.80.77, 219.143.204.117, 2001:da8:204:1205::22
Connecting to mirror.bit.edu.cn (mirror.bit.edu.cn)|202.204.80.77|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 239947422 (229M) [application/octet-stream]
Saving to: ‘spark-3.0.0-preview2-bin-hadoop3.2.tgz’
spark-3.0.0-preview2-bin-hadoop3. 1%[ ] 2.48M 76.0KB/s eta 36m 14s
root@kylin:~/wget# mkdir /opt/spark
root@kylin:~/wget# tar -zxf spark-3.0.0-preview2-bin-hadoop3.2.tgz -C /opt/spark/
root@kylin:~/wget# cd /opt/spark/
root@kylin:/opt/spark# ll
total 12
drwxr-xr-x 3 root root 4096 May 31 23:29 ./
drwxr-xr-x 9 root root 4096 May 31 23:29 ../
drwxr-xr-x 13 110302528 ubuntu 4096 Dec 17 12:59 spark-3.0.0-preview2-bin-hadoop3.2/
root@kylin:/opt/spark# ln -s spark-3.0.0-preview2-bin-hadoop3.2 current
root@kylin:/opt/spark# chown -R hadoop:hadoop /opt/spark/
root@kylin:/opt/spark# ll current/
total 164
drwxr-xr-x 13 hadoop hadoop 4096 Dec 17 12:59 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 31 23:30 ../
-rw-r--r-- 1 hadoop hadoop 23311 Dec 17 12:59 LICENSE
-rw-r--r-- 1 hadoop hadoop 57677 Dec 17 12:59 NOTICE
drwxr-xr-x 3 hadoop hadoop 4096 Dec 17 12:59 R/
-rw-r--r-- 1 hadoop hadoop 4666 Dec 17 12:59 README.md
-rw-r--r-- 1 hadoop hadoop 192 Dec 17 12:59 RELEASE
drwxr-xr-x 2 hadoop hadoop 4096 Dec 17 12:59 bin/
drwxr-xr-x 2 hadoop hadoop 4096 Dec 17 12:59 conf/
drwxr-xr-x 5 hadoop hadoop 4096 Dec 17 12:59 data/
drwxr-xr-x 4 hadoop hadoop 4096 Dec 17 12:59 examples/
drwxr-xr-x 2 hadoop hadoop 20480 Dec 17 12:59 jars/
drwxr-xr-x 4 hadoop hadoop 4096 Dec 17 12:59 kubernetes/
drwxr-xr-x 2 hadoop hadoop 4096 Dec 17 12:59 licenses/
drwxr-xr-x 7 hadoop hadoop 4096 Dec 17 12:59 python/
drwxr-xr-x 2 hadoop hadoop 4096 Dec 17 12:59 sbin/
drwxr-xr-x 2 hadoop hadoop 4096 Dec 17 12:59 yarn/
root@kylin:/opt/spark# mkdir /etc/spark
root@kylin:/opt/spark# cp -r /opt/spark/current/conf /etc/spark/
root@kylin:/opt/spark# chown -R hadoop:hadoop /etc/spark/
root@kylin:/opt/spark# vim /home/hadoop/.bashrc
---
#Spark
export SPARK_HOME=/opt/spark/current
export SPARK_CONF_DIR=/etc/spark/conf
export PATH=$PATH:$SPARK_HOME/bin
---
root@kylin:/opt/spark# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop
hadoop@kylin:~$ cd /etc/spark/conf/
hadoop@kylin:/etc/spark/conf$ ll
total 44
drwxr-xr-x 2 hadoop hadoop 4096 May 31 23:38 ./
drwxr-xr-x 3 hadoop hadoop 4096 May 31 23:38 ../
-rw-r--r-- 1 hadoop hadoop 1105 May 31 23:38 fairscheduler.xml.template
-rw-r--r-- 1 hadoop hadoop 2023 May 31 23:38 log4j.properties.template
-rw-r--r-- 1 hadoop hadoop 9141 May 31 23:38 metrics.properties.template
-rw-r--r-- 1 hadoop hadoop 865 May 31 23:38 slaves.template
-rw-r--r-- 1 hadoop hadoop 1292 May 31 23:38 spark-defaults.conf.template
-rwxr-xr-x 1 hadoop hadoop 4344 May 31 23:38 spark-env.sh.template*
hadoop@kylin:/etc/spark/conf$ mv spark-env.sh.template spark-env.sh
hadoop@kylin:/etc/spark/conf$ vim spark-env.sh
hadoop@kylin:/etc/spark/conf$ cat spark-env.sh | grep -v ^#|grep -v ^$
export JAVA_HOME=/usr/java/default
export SCALA_HOME=/usr/scala/default
export HADOOP_HOME=/opt/hadoop/current
export HADOOP_CONF_DIR=/etc/hadoop/conf
export SPARK_HOME=/opt/spark/current
export SPARK_CONF_DIR=/etc/spark/conf
hadoop@kylin:/etc/spark/conf$ mv slaves.template slaves
hadoop@kylin:/etc/spark/conf$ cat slaves | grep -v ^#|grep -v ^$
localhost
hadoop@kylin:/etc/spark/conf$ cd /$SPARK_HOME
hadoop@kylin://opt/spark/current$ cd conf/
hadoop@kylin://opt/spark/current/conf$ ll
total 44
drwxr-xr-x 2 hadoop hadoop 4096 Dec 17 12:59 ./
drwxr-xr-x 13 hadoop hadoop 4096 Dec 17 12:59 ../
-rw-r--r-- 1 hadoop hadoop 1105 Dec 17 12:59 fairscheduler.xml.template
-rw-r--r-- 1 hadoop hadoop 2023 Dec 17 12:59 log4j.properties.template
-rw-r--r-- 1 hadoop hadoop 9141 Dec 17 12:59 metrics.properties.template
-rw-r--r-- 1 hadoop hadoop 865 Dec 17 12:59 slaves.template
-rw-r--r-- 1 hadoop hadoop 1292 Dec 17 12:59 spark-defaults.conf.template
-rwxr-xr-x 1 hadoop hadoop 4344 Dec 17 12:59 spark-env.sh.template*
hadoop@kylin://opt/spark/current/conf$ ln -s /etc/spark/conf/spark-env.sh spark-env.sh
hadoop@kylin://opt/spark/current/conf$ ln -s /etc/spark/conf/slaves slaves
hadoop@kylin://opt/spark/current/conf$ ll
total 44
drwxr-xr-x 2 hadoop hadoop 4096 Jun 2 13:56 ./
drwxr-xr-x 13 hadoop hadoop 4096 Dec 17 12:59 ../
-rw-r--r-- 1 hadoop hadoop 1105 Dec 17 12:59 fairscheduler.xml.template
-rw-r--r-- 1 hadoop hadoop 2023 Dec 17 12:59 log4j.properties.template
-rw-r--r-- 1 hadoop hadoop 9141 Dec 17 12:59 metrics.properties.template
lrwxrwxrwx 1 hadoop hadoop 22 Jun 2 13:56 slaves -> /etc/spark/conf/slaves
-rw-r--r-- 1 hadoop hadoop 865 Dec 17 12:59 slaves.template
-rw-r--r-- 1 hadoop hadoop 1292 Dec 17 12:59 spark-defaults.conf.template
lrwxrwxrwx 1 hadoop hadoop 28 Jun 2 13:56 spark-env.sh -> /etc/spark/conf/spark-env.sh*
-rwxr-xr-x 1 hadoop hadoop 4344 Dec 17 12:59 spark-env.sh.template*
# https://spark.apache.org/docs/latest/spark-standalone.html
# 启动 master
hadoop@kylin:~$ $SPARK_HOME/sbin/start-master.sh
starting org.apache.spark.deploy.master.Master, logging to /opt/spark/current/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-kylin.out
# 停止 master
hadoop@kylin:~$ $SPARK_HOME/sbin/stop-master.sh
stopping org.apache.spark.deploy.master.Master
# 启动 worker
hadoop@kylin:~$ $SPARK_HOME/sbin/start-slave.sh
Usage: ./sbin/start-slave.sh <master> [options]
2020-06-02 14:10:50,173 WARN util.Utils: Your hostname, kylin resolves to a loopback address: 127.0.1.1; using 172.26.10.48 instead (on interface eth0)
2020-06-02 14:10:50,175 WARN util.Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Master must be a URL of the form spark://hostname:port
Options:
-c CORES, --cores CORES Number of cores to use
-m MEM, --memory MEM Amount of memory to use (e.g. 1000M, 2G)
-d DIR, --work-dir DIR Directory to run apps in (default: SPARK_HOME/work)
-i HOST, --ip IP Hostname to listen on (deprecated, please use --host or -h)
-h HOST, --host HOST Hostname to listen on
-p PORT, --port PORT Port to listen on (default: random)
--webui-port PORT Port for web UI (default: 8081)
--properties-file FILE Path to a custom Spark properties file.
Default is conf/spark-defaults.conf.
# 启动 worker
hadoop@kylin:~$ $SPARK_HOME/sbin/start-slave.sh spark://kylin.localdomain:7077
starting org.apache.spark.deploy.worker.Worker, logging to /opt/spark/current/logs/spark-hadoop-org.apache.spark.deploy.worker.Worker-1-kylin.out
# 停止 worker
hadoop@kylin:~$ $SPARK_HOME/sbin/stop-slave.sh spark://kylin.localdomain:7077
stopping org.apache.spark.deploy.worker.Worker
# ./bin/spark-shell --master spark://IP:PORT
#
hadoop@kylin:~$ $SPARK_HOME/bin/spark-shell
2020-06-02 15:04:30,279 WARN util.Utils: Your hostname, kylin resolves to a loopback address: 127.0.1.1; using 172.26.10.48 instead (on interface eth0)
2020-06-02 15:04:30,283 WARN util.Utils: Set SPARK_LOCAL_IP if you need to bind to another address
2020-06-02 15:04:31,882 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://hadoop:4040
Spark context available as 'sc' (master = local[*], app id = local-1591081501036).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ ''_/
/___/ .__/\_,_/_/ /_/\_\ version 3.0.0-preview2
/_/
Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_181)
Type in expressions to have them evaluated.
Type :help for more information.
scala>^C
hadoop@kylin:~$ $SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi --master local /opt/spark/current/examples/jars/spark-examples_2.12-3.0.0-preview2.jar
'
......
2020-06-02 15:22:02,628 INFO scheduler.DAGScheduler: Job 0 finished: reduce at SparkPi.scala:38, took 3.606569 s
Pi is roughly 3.1397756988784944
......
'
hadoop@kylin:~$ $SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn /opt/spark/current/examples/jars/spark-examples_2.12-3.0.0-preview2.jar
'
......
2020-06-02 15:26:52,090 INFO scheduler.DAGScheduler: Job 0 finished: reduce at SparkPi.scala:38, took 21.626012 s
Pi is roughly 3.1376956884784426
......
'
# hadoop@kylin:~$ mv /opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar /opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar.bak
#配置Spark读取hive表:
hadoop@kylin:~$ vim /etc/hive/conf/hive-site.xml
<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost:9083</value>
</property>
hadoop@kylin:~$ cp /etc/hive/conf/hive-site.xml /etc/spark/conf/
hadoop@kylin:~$ cd $SPARK_CONF_DIR
hadoop@kylin:/etc/spark/conf$ chmod 777 hive-site.xml
hadoop@kylin:~$ cp /opt/hive/current/lib/mysql-connector-java-5.1.45.jar /opt/spark/current/jars/
# spark sql 操作hive
#
# 执行 $SPARK_HOME/bin/spark-sql 时会有大量INFO日志,执行如下操作:
hadoop@kylin:/opt/spark/current/bin$ cd /etc/spark/conf/
hadoop@kylin:/etc/spark/conf$ cp log4j.properties.template log4j.properties
hadoop@kylin:/etc/spark/conf$ sed -i 's/INFO/ERROR/' log4j.properties
hadoop@kylin:/opt/spark/current/bin$ spark-sql
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop/hadoop-3.2.1/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
Spark master: local[*], Application Id: local-1591270878565
spark-sql (default)> show databases;
namespace
default
test_bdw
Time taken: 9.809 seconds, Fetched 2 row(s)
spark-sql (default)> use default;
Response code
Time taken: 0.344 seconds
spark-sql (default)> show tables;
database tableName isTemporary
default kylin_account false
default kylin_cal_dt false
default kylin_category_groupings false
default kylin_country false
default kylin_sales false
Time taken: 0.459 seconds, Fetched 5 row(s)
spark-sql (default)> select * from kylin_country limit 3;
country latitude longitude name
AD 42.546245 1.601554 Andorra
AE 23.424076 53.847818 United Arab Emirates
AF 33.93911 67.709953 Afghanistan
Time taken: 9.221 seconds, Fetched 3 row(s)
spark-sql (default)> quit;
hadoop@kylin:/opt/spark/current/bin$ mv /opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar /opt/spark/spark-3.0.0-preview2-bin-hadoop3.2/jars/slf4j-log4j12-1.7.16.jar.bak
# pyspark
hadoop@kylin:/opt/spark/current/bin$ pyspark
Python 2.7.17 (default, Apr 15 2020, 17:20:14)
[GCC 7.5.0] on linux2
Type "help", "copyright", "credits" or "license" for more information.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
/opt/spark/current/python/pyspark/context.py:219: DeprecationWarning: Support for Python 2 and Python 3 prior to version 3.6 is deprecated as of Spark 3.0. See also the plan for dropping Python 2 support at https://spark.apache.org/news/plan-for-dropping-python-2-support.html.
DeprecationWarning)
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ ''_/
/__ / .__/\_,_/_/ /_/\_\ version 3.0.0-preview2
/_/
Using Python version 2.7.17 (default, Apr 15 2020 17:20:14)
SparkSession available as 'spark'.
>>>
# spark shell
hadoop@kylin:~$ $SPARK_HOME/bin/spark-shell
2020-06-02 16:36:23,933 WARN util.Utils: Your hostname, kylin resolves to a loopback address: 127.0.1.1; using 172.26.10.48 instead (on interface eth0)
2020-06-02 16:36:23,939 WARN util.Utils: Set SPARK_LOCAL_IP if you need to bind to another address
2020-06-02 16:36:26,612 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://hadoop:4040
Spark context available as 'sc' (master = local[*], app id = local-1591087011628).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ ''_/
/___/ .__/\_,_/_/ /_/\_\ version 3.0.0-preview2
/_/
Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_181)
Type in expressions to have them evaluated.
Type :help for more information.
scala> import org.apache.spark.sql.hive.HiveContext
<console>:23: error: object HiveContext is not a member of package org.apache.spark.sql.hive
import org.apache.spark.sql.hive.HiveContext
^
hadoop@kylin:/etc/spark/conf$ vim spark-env.sh
hadoop@kylin:/etc/spark/conf$ cat spark-env.sh | grep -v ^#|grep -v ^$
export JAVA_HOME=/usr/java/default
export SCALA_HOME=/usr/scala/default
export HADOOP_HOME=/opt/hadoop/current
export HADOOP_CONF_DIR=/etc/hadoop/conf
export SPARK_HOME=/opt/spark/current
export SPARK_CONF_DIR=/etc/spark/conf
export CLASSPATH=$CLASSPATH:/opt/hive/current/lib
export HIVE_CONF_DIR=/etc/hive/conf
export SPARK_CLASSPATH=$SPARK_CLASSPATH:/opt/hive/current/lib/mysql-connector-java-5.1.45.jar
export SPARK_DIST_CLASSPATH=$(/opt/hadoop/current/bin/hadoop classpath)
# Spark版本不包含Hive支持。
# 可以选择从Spark源码构建.
#
root@kylin:~/wget# wget https://mirrors.bfsu.edu.cn/apache/spark/spark-3.0.0-preview2/spark-3.0.0-preview2.tgz
--2020-06-02 22:12:03-- https://mirrors.bfsu.edu.cn/apache/spark/spark-3.0.0-preview2/spark-3.0.0-preview2.tgz
Resolving mirrors.bfsu.edu.cn (mirrors.bfsu.edu.cn)... 39.155.141.16, 2001:da8:20f:4435:4adf:37ff:fe55:2840
Connecting to mirrors.bfsu.edu.cn (mirrors.bfsu.edu.cn)|39.155.141.16|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 21811982 (21M) [application/octet-stream]
Saving to: ‘spark-3.0.0-preview2.tgz’
spark-3.0.0-preview2.tgz 100%[===================================================================>] 20.80M 5.30MB/s in 4.3s
2020-06-02 22:12:08 (4.87 MB/s) - ‘spark-3.0.0-preview2.tgz’ saved [21811982/21811982]
root@kylin:~/wget# tar -zxf spark-3.0.0-preview2.tgz -C /opt/spark/
root@kylin:~/wget# cd /opt/spark/
root@kylin:/opt/spark# mv spark-3.0.0-preview2 spark-3.0.0-preview2-sourcecode
root@kylin:/opt/spark# chown -R hadoop:hadoop spark-3.0.0-preview2-sourcecode/
root@kylin:~# apt install maven
root@kylin:~# mvn -version
Apache Maven 3.6.0
Maven home: /usr/share/maven
Java version: 1.8.0_181, vendor: Oracle Corporation, runtime: /usr/java/jdk1.8.0_181/jre
Default locale: en, platform encoding: UTF-8
OS name: "linux", version: "4.19.104-microsoft-standard", arch: "amd64", family: "unix"
root@kylin:~# whereis maven
maven: /etc/maven /usr/share/maven
root@kylin:~# cd /usr/share/maven
root@kylin:/usr/share/maven# ll
total 24
drwxr-xr-x 6 root root 4096 Jun 2 17:41 ./
drwxr-xr-x 118 root root 4096 Jun 2 17:41 ../
drwxr-xr-x 2 root root 4096 Jun 2 17:41 bin/
drwxr-xr-x 2 root root 4096 Jun 2 17:41 boot/
lrwxrwxrwx 1 root root 10 Apr 10 2019 conf -> /etc/maven/
drwxr-xr-x 2 root root 4096 Jun 2 17:41 lib/
drwxr-xr-x 2 root root 4096 Jun 2 17:41 man/
root@kylin:/usr/share/maven# vim /root/.bashrc
---
export MAVEN_HOME=/usr/share/maven
export PATH=$PATH:$MAVEN_HOME/bin
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
---
root@kylin:/usr/share/maven# source /root/.bashrc
root@kylin:/opt/spark# cd spark-3.0.0-preview2-sourcecode/
root@kylin:/opt/spark/spark-3.0.0-preview2-sourcecode# ./dev/change-scala-version.sh 2.12
./dev/../graph/graph/pom.xml
./dev/../graph/cypher/pom.xml
./dev/../graph/api/pom.xml
./dev/../mllib-local/pom.xml
./dev/../repl/pom.xml
./dev/../launcher/pom.xml
./dev/../examples/pom.xml
./dev/../sql/catalyst/pom.xml
./dev/../sql/core/pom.xml
./dev/../sql/hive/pom.xml
./dev/../sql/hive-thriftserver/pom.xml
./dev/../graphx/pom.xml
./dev/../resource-managers/yarn/pom.xml
./dev/../resource-managers/kubernetes/integration-tests/pom.xml
./dev/../resource-managers/kubernetes/core/pom.xml
./dev/../resource-managers/mesos/pom.xml
./dev/../common/unsafe/pom.xml
./dev/../common/tags/pom.xml
./dev/../common/kvstore/pom.xml
./dev/../common/network-shuffle/pom.xml
./dev/../common/network-yarn/pom.xml
./dev/../common/sketch/pom.xml
./dev/../common/network-common/pom.xml
./dev/../mllib/pom.xml
./dev/../hadoop-cloud/pom.xml
./dev/../core/pom.xml
./dev/../assembly/pom.xml
./dev/../external/kafka-0-10/pom.xml
./dev/../external/avro/pom.xml
./dev/../external/kinesis-asl-assembly/pom.xml
./dev/../external/spark-ganglia-lgpl/pom.xml
./dev/../external/docker-integration-tests/pom.xml
./dev/../external/kafka-0-10-assembly/pom.xml
./dev/../external/kafka-0-10-token-provider/pom.xml
./dev/../external/kafka-0-10-sql/pom.xml
./dev/../external/kinesis-asl/pom.xml
./dev/../tools/pom.xml
./dev/../pom.xml
./dev/../streaming/pom.xml
./dev/../docs/_plugins/copy_api_dirs.rb
root@kylin:/opt/spark/spark-3.0.0-preview2-sourcecode# mvn -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests clean package
[INFO] Scanning for projects...
。。。。。。
#mvn -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests clean package
hadoop@kylin:/opt/spark/spark-3.0.0-preview2-sourcecode$ mvn -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests clean package
......
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary for Spark Project Parent POM 3.0.0-preview2:
[INFO]
[INFO] Spark Project Parent POM ........................... SUCCESS [ 5.716 s]
[INFO] Spark Project Tags ................................. SUCCESS [ 17.282 s]
[INFO] Spark Project Sketch ............................... SUCCESS [ 20.009 s]
[INFO] Spark Project Local DB ............................. SUCCESS [ 3.199 s]
[INFO] Spark Project Networking ........................... SUCCESS [ 6.765 s]
[INFO] Spark Project Shuffle Streaming Service ............ SUCCESS [ 7.790 s]
[INFO] Spark Project Unsafe ............................... SUCCESS [ 29.087 s]
[INFO] Spark Project Launcher ............................. SUCCESS [ 4.287 s]
[INFO] Spark Project Core ................................. SUCCESS [07:57 min]
[INFO] Spark Project ML Local Library ..................... SUCCESS [01:34 min]
[INFO] Spark Project GraphX ............................... SUCCESS [02:14 min]
[INFO] Spark Project Streaming ............................ SUCCESS [04:30 min]
[INFO] Spark Project Catalyst ............................. SUCCESS [22:20 min]
[INFO] Spark Project SQL .................................. SUCCESS [19:29 min]
[INFO] Spark Project ML Library ........................... SUCCESS [09:54 min]
[INFO] Spark Project Tools ................................ SUCCESS [ 42.535 s]
[INFO] Spark Project Hive ................................. SUCCESS [06:23 min]
[INFO] Spark Project Graph API ............................ SUCCESS [ 5.058 s]
[INFO] Spark Project Cypher ............................... SUCCESS [ 3.054 s]
[INFO] Spark Project Graph ................................ SUCCESS [ 5.758 s]
[INFO] Spark Project REPL ................................. SUCCESS [02:23 min]
[INFO] Spark Project YARN Shuffle Service ................. SUCCESS [ 38.680 s]
[INFO] Spark Project YARN ................................. SUCCESS [07:10 min]
[INFO] Spark Project Hive Thrift Server ................... SUCCESS [16:51 min]
[INFO] Spark Project Assembly ............................. SUCCESS [ 44.521 s]
[INFO] Kafka 0.10+ Token Provider for Streaming ........... SUCCESS [12:00 min]
[INFO] Spark Integration for Kafka 0.10 ................... SUCCESS [21:40 min]
[INFO] Kafka 0.10+ Source for Structured Streaming ........ SUCCESS [12:49 min]
[INFO] Spark Project Examples ............................. SUCCESS [06:29 min]
[INFO] Spark Integration for Kafka 0.10 Assembly .......... SUCCESS [ 55.229 s]
[INFO] Spark Avro ......................................... SUCCESS [03:43 min]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 02:42 h
[INFO] Finished at: 2020-06-03T23:54:05+08:00
[INFO] ------------------------------------------------------------------------
#./dev/./make-distribution.sh --name hadoop3.2.1-hive --tgz -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests
hadoop@kylin:/opt/spark/spark-3.0.0-preview2-sourcecode$ ./dev/./make-distribution.sh --name hadoop3.2.1-hive --tgz -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.1 -Phive -Phive-thriftserver -DskipTests
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary for Spark Project Parent POM 3.0.0-preview2:
[INFO]
[INFO] Spark Project Parent POM ........................... SUCCESS [ 13.513 s]
[INFO] Spark Project Tags ................................. SUCCESS [ 23.672 s]
[INFO] Spark Project Sketch ............................... SUCCESS [ 25.063 s]
[INFO] Spark Project Local DB ............................. SUCCESS [ 4.005 s]
[INFO] Spark Project Networking ........................... SUCCESS [ 7.310 s]
[INFO] Spark Project Shuffle Streaming Service ............ SUCCESS [ 3.006 s]
[INFO] Spark Project Unsafe ............................... SUCCESS [ 24.651 s]
[INFO] Spark Project Launcher ............................. SUCCESS [ 4.874 s]
[INFO] Spark Project Core ................................. SUCCESS [06:04 min]
[INFO] Spark Project ML Local Library ..................... SUCCESS [01:19 min]
[INFO] Spark Project GraphX ............................... SUCCESS [03:00 min]
[INFO] Spark Project Streaming ............................ SUCCESS [04:25 min]
[INFO] Spark Project Catalyst ............................. SUCCESS [11:58 min]
[INFO] Spark Project SQL .................................. SUCCESS [17:14 min]
[INFO] Spark Project ML Library ........................... SUCCESS [09:33 min]
[INFO] Spark Project Tools ................................ SUCCESS [ 36.058 s]
[INFO] Spark Project Hive ................................. SUCCESS [08:19 min]
[INFO] Spark Project Graph API ............................ SUCCESS [ 4.513 s]
[INFO] Spark Project Cypher ............................... SUCCESS [ 2.620 s]
[INFO] Spark Project Graph ................................ SUCCESS [ 6.191 s]
[INFO] Spark Project REPL ................................. SUCCESS [02:11 min]
[INFO] Spark Project YARN Shuffle Service ................. SUCCESS [ 43.326 s]
[INFO] Spark Project YARN ................................. SUCCESS [04:02 min]
[INFO] Spark Project Hive Thrift Server ................... SUCCESS [03:56 min]
[INFO] Spark Project Assembly ............................. SUCCESS [ 23.387 s]
[INFO] Kafka 0.10+ Token Provider for Streaming ........... SUCCESS [01:27 min]
[INFO] Spark Integration for Kafka 0.10 ................... SUCCESS [02:27 min]
[INFO] Kafka 0.10+ Source for Structured Streaming ........ SUCCESS [04:18 min]
[INFO] Spark Project Examples ............................. SUCCESS [01:53 min]
[INFO] Spark Integration for Kafka 0.10 Assembly .......... SUCCESS [12:11 min]
[INFO] Spark Avro ......................................... SUCCESS [02:37 min]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 01:40 h
[INFO] Finished at: 2020-06-04T02:00:15+08:00
[INFO] ------------------------------------------------------------------------
# Flink 1.10.1
# https://flink.apache.org/zh/
root@kylin:~/wget# wget https://mirrors.bfsu.edu.cn/apache/flink/flink-1.10.1/flink-1.10.1-bin-scala_2.12.tgz
--2020-06-03 01:15:39-- https://mirrors.bfsu.edu.cn/apache/flink/flink-1.10.1/flink-1.10.1-bin-scala_2.12.tgz
Resolving mirrors.bfsu.edu.cn (mirrors.bfsu.edu.cn)... 39.155.141.16, 2001:da8:20f:4435:4adf:37ff:fe55:2840
Connecting to mirrors.bfsu.edu.cn (mirrors.bfsu.edu.cn)|39.155.141.16|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 279396579 (266M) [application/octet-stream]
Saving to: ‘flink-1.10.1-bin-scala_2.12.tgz’
flink-1.10.1-bin-scala_2.12.tgz 100%[===================================================================>] 266.45M 4.03MB/s in 2m 39s
2020-06-03 01:18:21 (1.67 MB/s) - ‘flink-1.10.1-bin-scala_2.12.tgz’ saved [279396579/279396579]
root@kylin:~/wget# mkdir /opt/flink
root@kylin:~/wget# tar -zxf flink-1.10.1-bin-scala_2.12.tgz -C /opt/flink/
root@kylin:~/wget# cd /opt/flink/
root@kylin:/opt/flink# ll
total 12
drwxr-xr-x 3 root root 4096 Jun 5 00:29 ./
drwxr-xr-x 14 root root 4096 Jun 5 00:28 ../
drwxr-xr-x 10 root root 4096 May 7 18:09 flink-1.10.1/
root@kylin:/opt/flink# ln -s flink-1.10.1 current
root@kylin:/opt/flink# ll
total 12
drwxr-xr-x 3 root root 4096 Jun 5 00:30 ./
drwxr-xr-x 14 root root 4096 Jun 5 00:28 ../
lrwxrwxrwx 1 root root 12 Jun 5 00:30 current -> flink-1.10.1/
drwxr-xr-x 10 root root 4096 May 7 18:09 flink-1.10.1/
root@kylin:/opt/flink# cd current
root@kylin:/opt/flink/current# ll
total 596
drwxr-xr-x 10 root root 4096 May 7 18:09 ./
drwxr-xr-x 3 root root 4096 Jun 5 00:30 ../
drwxr-xr-x 2 root root 4096 May 7 18:09 bin/
drwxr-xr-x 2 root root 4096 May 7 18:09 conf/
drwxr-xr-x 7 root root 4096 May 7 18:09 examples/
drwxr-xr-x 2 root root 4096 May 7 18:09 lib/
-rw-r--r-- 1 root root 11357 May 7 13:45 LICENSE
drwxr-xr-x 2 root root 4096 May 7 18:09 licenses/
drwxr-xr-x 2 root root 4096 May 7 13:45 log/
-rw-r--r-- 1 root root 550465 May 7 18:09 NOTICE
drwxr-xr-x 2 root root 4096 May 7 18:09 opt/
drwxr-xr-x 2 root root 4096 May 7 13:45 plugins/
-rw-r--r-- 1 root root 1309 May 7 13:45 README.txt
root@kylin:/opt/flink/current# cd ..
root@kylin:/opt/flink# ll current/
total 596
drwxr-xr-x 10 root root 4096 May 7 18:09 ./
drwxr-xr-x 3 root root 4096 Jun 5 00:30 ../
drwxr-xr-x 2 root root 4096 May 7 18:09 bin/
drwxr-xr-x 2 root root 4096 May 7 18:09 conf/
drwxr-xr-x 7 root root 4096 May 7 18:09 examples/
drwxr-xr-x 2 root root 4096 May 7 18:09 lib/
-rw-r--r-- 1 root root 11357 May 7 13:45 LICENSE
drwxr-xr-x 2 root root 4096 May 7 18:09 licenses/
drwxr-xr-x 2 root root 4096 May 7 13:45 log/
-rw-r--r-- 1 root root 550465 May 7 18:09 NOTICE
drwxr-xr-x 2 root root 4096 May 7 18:09 opt/
drwxr-xr-x 2 root root 4096 May 7 13:45 plugins/
-rw-r--r-- 1 root root 1309 May 7 13:45 README.txt
root@kylin:/opt/flink# chown -R hadoop:hadoop /opt/flink/
root@kylin:/opt/flink# mkdir /etc/flink
root@kylin:/opt/flink# cp -r /opt/flink/current/conf /etc/flink/
root@kylin:/opt/flink# chown -R hadoop:hadoop /etc/flink/
root@kylin:/opt/flink# vim /home/hadoop/.bashrc
# Flink
export FLINK_HOME=/opt/flink/current
export FLINK_CONF_DIR=/etc/flink/conf
export PATH=$PATH:$FLINK_HOME/bin
root@kylin:/opt/flink# source /home/hadoop/.bashrc
root@kylin:/home/hadoop# su hadoop
hadoop@kylin:~$ cd /etc/flink/conf/
# flink Standalone 模式配置
# https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/deployment/cluster_setup.html#configuring-flink
hadoop@kylin:/etc/flink/conf$ vim flink-conf.yaml
hadoop@kylin:/etc/flink/conf$ cat flink-conf.yaml | grep -v ^# | grep -v ^$
jobmanager.rpc.address: localhost
jobmanager.rpc.port: 6123
jobmanager.heap.size: 1024m
taskmanager.memory.process.size: 1728m
taskmanager.numberOfTaskSlots: 1
parallelism.default: 1
jobmanager.execution.failover-strategy: region
rest.port: 6061
io.tmp.dirs: /tmp
hadoop@kylin:/etc/flink/conf$ cat masters
localhost:6061
hadoop@kylin:/etc/flink/conf$ vim masters
hadoop@kylin:/etc/flink/conf$ cat masters
localhost
hadoop@kylin:/etc/flink/conf$ cat slaves
localhost
hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/start-cluster.sh
Starting cluster.
Starting standalonesession daemon on host kylin.
Starting taskexecutor daemon on host kylin.
hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/stop-cluster.sh
Stopping taskexecutor daemon (pid: 28874) on host kylin.
No standalonesession daemon (pid: 29287) is running anymore on kylin.
hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/jobmanager.sh start
Starting standalonesession daemon on host kylin.
hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/taskmanager.sh start
Starting taskexecutor daemon on host kylin.
hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/jobmanager.sh stop
Stopping standalonesession daemon (pid: 31453) on host kylin.
hadoop@kylin:/etc/flink/conf$ $FLINK_HOME/bin/taskmanager.sh stop
Stopping taskexecutor daemon (pid: 31841) on host kylin.
hadoop@kylin:/opt/flink/current$ ./bin/flink run examples/batch/WordCount.jar
Executing WordCount example with default input data set.
Use --input to specify file input.
Printing result to stdout. Use --output to specify output path.
Job has been submitted with JobID 13dc9137f65ff788b594f8d0911ddbf7
Program execution finished
Job with JobID 13dc9137f65ff788b594f8d0911ddbf7 has finished.
Job Runtime: 3323 ms
Accumulator Results:
- ee1e51903121e33fcc0251615b5a2841 (java.util.ArrayList) [170 elements]
(a,5)
(action,1)
(after,1)
(against,1)
(all,2)
(and,12)
......
# On Yarn 模式和 HA 配置
# https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/jobmanager_high_availability.html
# 配置过 HADOOP_CONF_DIR了!
hadoop@kylin:/opt/flink$ wget https://mirrors.tuna.tsinghua.edu.cn/apache/flink/flink-shaded-11.0/flink-shaded-11.0-src.tgz
--2020-06-05 02:13:37-- https://mirrors.tuna.tsinghua.edu.cn/apache/flink/flink-shaded-11.0/flink-shaded-11.0-src.tgz
Resolving mirrors.tuna.tsinghua.edu.cn (mirrors.tuna.tsinghua.edu.cn)... 101.6.8.193, 2402:f000:1:408:8100::1
Connecting to mirrors.tuna.tsinghua.edu.cn (mirrors.tuna.tsinghua.edu.cn)|101.6.8.193|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 14529 (14K) [application/octet-stream]
Saving to: ‘flink-shaded-11.0-src.tgz’
flink-shaded-11.0-src.tgz 100%[============================================================================>] 14.19K --.-KB/s in 0s
2020-06-05 02:13:37 (97.8 MB/s) - ‘flink-shaded-11.0-src.tgz’ saved [14529/14529]
hadoop@kylin:/opt/flink$ ll
total 28
drwxr-xr-x 3 hadoop hadoop 4096 Jun 5 02:16 ./
drwxr-xr-x 14 root root 4096 Jun 5 00:28 ../
lrwxrwxrwx 1 hadoop hadoop 12 Jun 5 00:30 current -> flink-1.10.1/
drwxr-xr-x 10 hadoop hadoop 4096 May 7 18:09 flink-1.10.1/
-rw-rw-r-- 1 hadoop hadoop 14529 May 26 01:03 flink-shaded-11.0-src.tgz
hadoop@kylin:/opt/flink$ tar -zxf flink-shaded-11.0-src.tgz
hadoop@kylin:/opt/flink$ ll
total 32
drwxr-xr-x 4 hadoop hadoop 4096 Jun 5 02:18 ./
drwxr-xr-x 14 root root 4096 Jun 5 00:28 ../
lrwxrwxrwx 1 hadoop hadoop 12 Jun 5 00:30 current -> flink-1.10.1/
drwxr-xr-x 10 hadoop hadoop 4096 May 7 18:09 flink-1.10.1/
drwxrwxr-x 11 hadoop hadoop 4096 May 26 01:00 flink-shaded-11.0/
-rw-rw-r-- 1 hadoop hadoop 14529 May 26 01:03 flink-shaded-11.0-src.tgz
hadoop@kylin:/opt/flink$ cd flink-shaded-11.0/
hadoop@kylin:/opt/flink/flink-shaded-11.0$ ll
total 80
drwxrwxr-x 11 hadoop hadoop 4096 May 26 01:00 ./
drwxr-xr-x 4 hadoop hadoop 4096 Jun 5 02:18 ../
drwxrwxr-x 3 hadoop hadoop 4096 May 26 01:00 flink-shaded-asm-7/
drwxrwxr-x 2 hadoop hadoop 4096 May 26 01:00 flink-shaded-force-shading/
drwxrwxr-x 3 hadoop hadoop 4096 May 26 01:00 flink-shaded-guava-18/
drwxrwxr-x 4 hadoop hadoop 4096 May 26 01:00 flink-shaded-jackson-parent/
drwxrwxr-x 3 hadoop hadoop 4096 May 26 01:00 flink-shaded-netty-4/
drwxrwxr-x 3 hadoop hadoop 4096 May 26 01:00 flink-shaded-netty-tcnative-dynamic/
drwxrwxr-x 3 hadoop hadoop 4096 May 26 01:00 flink-shaded-netty-tcnative-static/
drwxrwxr-x 4 hadoop hadoop 4096 May 26 01:00 flink-shaded-zookeeper-parent/
-rwxrwxr-x 1 hadoop hadoop 11357 May 26 01:00 LICENSE*
-rwxrwxr-x 1 hadoop hadoop 166 May 26 01:00 NOTICE*
-rwxrwxr-x 1 hadoop hadoop 13791 May 26 01:00 pom.xml*
-rwxrwxr-x 1 hadoop hadoop 1720 May 26 01:00 README.md*
drwxrwxr-x 3 hadoop hadoop 4096 May 26 01:00 tools/
# hadoop 3.2.1 默认guava版本为27
#
# https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/deployment/hadoop.html
hadoop@kylin:/opt/flink/flink-shaded-11.0$ mvn clean install -Dhadoop.version=3.2.1
# 没打包成功
# 选择下载2.X的包也是可以的,只要不用到hadoop3特有API就行了
hadoop@kylin:/opt/flink/current/lib$ wget https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-9.0/flink-shaded-hadoop-2-uber-2.8.3-9.0.jar
hadoop@kylin:~$ cd /etc/flink/conf/
hadoop@kylin:/etc/flink/conf$ cat flink-conf.yaml | grep high-availability
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
# high-availability: zookeeper
# high-availability.storageDir: hdfs:///flink/ha/
# The list of ZooKeeper quorum peers that coordinate the high-availability
# high-availability.zookeeper.quorum: localhost:2181
# high-availability.zookeeper.client.acl: open
#
hadoop@kylin:/etc/flink/conf$ sed -i 's/# high-availability/high-availability/' flink-conf.yaml
hadoop@kylin:/etc/flink/conf$ cat flink-conf.yaml | grep high-availability
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
high-availability: zookeeper
high-availability.storageDir: hdfs://localhost:9000/flink/ha/
# The list of ZooKeeper quorum peers that coordinate the high-availability
high-availability.zookeeper.quorum: localhost:2181
high-availability.zookeeper.client.acl: open
hadoop@kylin:/etc/flink/conf$ hadoop fs -mkdir -p /flink/ha/
hadoop@kylin:/etc/flink/conf$ hadoop fs -ls /
Found 6 items
drwxr-xr-x - hadoop supergroup 0 2020-06-05 02:35 /flink
drwxrwxrwx - hadoop supergroup 0 2020-06-04 19:27 /hbase
drwxrwxrwx - hadoop supergroup 0 2020-06-04 17:56 /kylin
drwxrwxrwx - hadoop supergroup 0 2020-06-03 19:25 /tez
drwxrwxrwx - hadoop supergroup 0 2020-06-01 02:52 /tmp
drwxrwxrwx - hadoop supergroup 0 2020-06-04 00:37 /user
hadoop@kylin:/etc/flink/conf$ hadoop fs -chmod -R go+w /flink
hadoop@kylin:/etc/flink/conf$ hadoop fs -ls /flink
Found 1 items
drwxrwxrwx - hadoop supergroup 0 2020-06-05 02:35 /flink/ha
hadoop@kylin:/etc/flink/conf$ cat masters
localhost
hadoop@kylin:/etc/flink/conf$ vim masters
hadoop@kylin:/etc/flink/conf$ cat masters
localhost:6061
hadoop@kylin:/etc/flink/conf$ cat zoo.cfg | grep -v ^# | grep -v ^$
tickTime=2000
initLimit=10
syncLimit=5
clientPort=2181
server.1=localhost:2888:3888
hadoop@kylin:/etc/flink/conf$ cat /etc/zookeeper/conf/zoo.cfg | grep -v ^# | grep -v ^$
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
clientPort=2181
hadoop@kylin:/etc/flink/conf$ echo "server.1=localhost:2888:3888" >> /etc/zookeeper/conf/zoo.cfg
hadoop@kylin:/etc/flink/conf$ cat /etc/zookeeper/conf/zoo.cfg | grep -v ^# | grep -v ^$
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/zookeeper/current/data
dataLogDir=/opt/zookeeper/current/logs
clientPort=2181
server.0=localhost:2888:3888
hadoop@kylin:/etc/flink/conf$ cd /opt/flink/current/lib
hadoop@kylin:/opt/flink/current/lib$ wget https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
hadoop@kylin:/etc/flink/conf$ cat flink-conf.yaml | grep -v ^# | grep -v ^$
jobmanager.rpc.address: localhost
jobmanager.rpc.port: 6123
jobmanager.heap.size: 1024m
taskmanager.memory.process.size: 1728m
taskmanager.numberOfTaskSlots: 1
parallelism.default: 1
high-availability: zookeeper
high-availability.storageDir: hdfs:///flink/ha/
high-availability.zookeeper.quorum: localhost:2181
high-availability.zookeeper.path.root: /flink
yarn.application-attempts: 10
high-availability.zookeeper.client.acl: open
jobmanager.execution.failover-strategy: region
rest.port: 6061
io.tmp.dirs: /tmp
hadoop@kylin:~$ $FLINK_HOME/bin/start-cluster.sh
Starting HA cluster with 1 masters.
Starting standalonesession daemon on host kylin.
Starting taskexecutor daemon on host kylin.
hadoop@kylin:/opt/flink/current$ ./bin/flink run -yjm 1024m -ytm 4096m -ys 2 ./examples/batch/WordCount.jar
hadoop@kylin:/opt/flink/current$ $FLINK_HOME/bin/yarn-session.sh -help
Usage:
Optional
-at,--applicationType <arg> Set a custom application type for the application on YARN
-D <property=value> use value for given property
-d,--detached If present, runs the job in detached mode
-h,--help Help for the Yarn session CLI.
-id,--applicationId <arg> Attach to running YARN session
-j,--jar <arg> Path to Flink jar file
-jm,--jobManagerMemory <arg> Memory for JobManager Container with optional unit (default: MB)
-m,--jobmanager <arg> Address of the JobManager (master) to which to connect. Use this flag to connect to a different JobManager than the one specified in the configuration.
-nl,--nodeLabel <arg> Specify YARN node label for the YARN application
-nm,--name <arg> Set a custom name for the application on YARN
-q,--query Display available YARN resources (memory, cores)
-qu,--queue <arg> Specify YARN queue.
-s,--slots <arg> Number of slots per TaskManager
-t,--ship <arg> Ship files in the specified directory (t for transfer)
-tm,--taskManagerMemory <arg> Memory per TaskManager Container with optional unit (default: MB)
-yd,--yarndetached If present, runs the job in detached mode (deprecated; use non-YARN specific option instead)
-z,--zookeeperNamespace <arg> Namespace to create the Zookeeper sub-paths for high availability mode
---
yarn-session的参数介绍
-n : 指定TaskManager的数量;
-d: 以分离模式运行;
-id:指定yarn的任务ID;
-j:Flink jar文件的路径;
-jm:JobManager容器的内存(默认值:MB);
-nl:为YARN应用程序指定YARN节点标签;
-nm:在YARN上为应用程序设置自定义名称;
-q:显示可用的YARN资源(内存,内核);
-qu:指定YARN队列;
-s:指定TaskManager中slot的数量;
-st:以流模式启动Flink;
-tm:每个TaskManager容器的内存(默认值:MB);
-z:命名空间,用于为高可用性模式创建Zookeeper子路径;
---
# Tez (0.10.1) 支持hadoop3.2.1
# http://tez.apache.org/
root@kylin:~/wget# mkdir Tez-0.10.1
root@kylin:~/wget# cd Tez-0.10.1/
root@kylin:~/wget/Tez-0.10.1# git --version
git version 2.17.1
root@kylin:~/wget/Tez-0.10.1# git clone https://github.com/apache/tez.git
Cloning into 'tez'...
remote: Enumerating objects: 64, done.
remote: Counting objects: 100% (64/64), done.
remote: Compressing objects: 100% (42/42), done.
remote: Total 97137 (delta 11), reused 42 (delta 5), pack-reused 97073
Receiving objects: 100% (97137/97137), 27.03 MiB | 795.00 KiB/s, done.
Resolving deltas: 100% (42789/42789), done.
Checking out files: 100% (1862/1862), done.
# maven 编译 tez源码
root@kylin:~/wget/Tez/Tez-0.10.1# mvn install -Dhadoop.version=3.2.1 -DskipTests -Dmaven.javadoc.skip=true
......
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary for tez 0.10.1-SNAPSHOT:
[INFO]
[INFO] tez ................................................ SUCCESS [ 4.663 s]
[INFO] hadoop-shim ........................................ SUCCESS [ 2.121 s]
[INFO] tez-api ............................................ SUCCESS [ 2.736 s]
[INFO] tez-build-tools .................................... SUCCESS [ 0.188 s]
[INFO] tez-common ......................................... SUCCESS [ 0.385 s]
[INFO] tez-runtime-internals .............................. SUCCESS [ 0.404 s]
[INFO] tez-runtime-library ................................ SUCCESS [ 0.878 s]
[INFO] tez-mapreduce ...................................... SUCCESS [ 0.676 s]
[INFO] tez-examples ....................................... SUCCESS [ 0.328 s]
[INFO] tez-dag ............................................ SUCCESS [ 2.846 s]
[INFO] tez-tests .......................................... SUCCESS [ 0.727 s]
[INFO] tez-ext-service-tests .............................. SUCCESS [ 0.447 s]
[INFO] tez-ui ............................................. SUCCESS [38:56 min]
[INFO] tez-plugins ........................................ SUCCESS [ 0.112 s]
[INFO] tez-protobuf-history-plugin ........................ SUCCESS [ 8.387 s]
[INFO] tez-yarn-timeline-history .......................... SUCCESS [ 2.745 s]
[INFO] tez-yarn-timeline-history-with-acls ................ SUCCESS [ 0.931 s]
[INFO] tez-yarn-timeline-cache-plugin ..................... SUCCESS [02:56 min]
[INFO] tez-yarn-timeline-history-with-fs .................. SUCCESS [ 0.976 s]
[INFO] tez-history-parser ................................. SUCCESS [ 37.990 s]
[INFO] tez-aux-services ................................... SUCCESS [06:13 min]
[INFO] tez-tools .......................................... SUCCESS [ 0.048 s]
[INFO] tez-perf-analyzer .................................. SUCCESS [ 0.043 s]
[INFO] tez-job-analyzer ................................... SUCCESS [ 2.030 s]
[INFO] tez-javadoc-tools .................................. SUCCESS [ 0.693 s]
[INFO] hadoop-shim-impls .................................. SUCCESS [ 0.092 s]
[INFO] hadoop-shim-2.8 .................................... SUCCESS [ 0.600 s]
[INFO] tez-dist ........................................... SUCCESS [01:45 min]
[INFO] Tez ................................................ SUCCESS [ 0.069 s]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 51:05 min
[INFO] Finished at: 2020-06-03T17:02:22+08:00
[INFO] ------------------------------------------------------------------------
root@kylin:~/wget/Tez/Tez-0.10.1# ll
total 216
drwxr-xr-x 22 root root 4096 Jun 3 16:06 ./
drwxr-xr-x 3 root root 4096 Jun 3 04:22 ../
drwxr-xr-x 8 root root 4096 Jun 3 03:26 .git/
-rw-r--r-- 1 root root 100 Jun 3 03:25 .gitignore
-rw-r--r-- 1 root root 1021 Jun 3 03:25 .travis.yml
-rw-r--r-- 1 root root 7160 Jun 3 03:25 BUILDING.txt
lrwxrwxrwx 1 root root 33 Jun 3 03:25 INSTALL.md -> docs/src/site/markdown/install.md
-rw-r--r-- 1 root root 35592 Jun 3 03:25 KEYS
-rw-r--r-- 1 root root 11358 Jun 3 03:25 LICENSE.txt
-rw-r--r-- 1 root root 164 Jun 3 03:25 NOTICE.txt
-rw-r--r-- 1 root root 1432 Jun 3 03:25 README.md
-rw-r--r-- 1 root root 5819 Jun 3 03:25 Tez_DOAP.rdf
drwxr-xr-x 3 root root 4096 Jun 3 03:25 build-tools/
drwxr-xr-x 4 root root 4096 Jun 3 17:02 docs/
drwxr-xr-x 4 root root 4096 Jun 3 12:01 hadoop-shim/
drwxr-xr-x 5 root root 4096 Jun 3 17:00 hadoop-shim-impls/
-rw-r--r-- 1 root root 47609 Jun 3 12:29 pom.xml
drwxr-xr-x 3 root root 4096 Jun 3 04:35 target/
drwxr-xr-x 4 root root 4096 Jun 3 12:32 tez-api/
drwxr-xr-x 4 root root 4096 Jun 3 12:32 tez-build-tools/
drwxr-xr-x 4 root root 4096 Jun 3 12:32 tez-common/
drwxr-xr-x 4 root root 4096 Jun 3 13:36 tez-dag/
drwxr-xr-x 5 root root 4096 Jun 3 17:00 tez-dist/
drwxr-xr-x 4 root root 4096 Jun 3 12:35 tez-examples/
drwxr-xr-x 4 root root 4096 Jun 3 14:13 tez-ext-service-tests/
drwxr-xr-x 4 root root 4096 Jun 3 12:34 tez-mapreduce/
drwxr-xr-x 10 root root 4096 Jun 3 16:50 tez-plugins/
drwxr-xr-x 4 root root 4096 Jun 3 12:32 tez-runtime-internals/
drwxr-xr-x 4 root root 4096 Jun 3 12:33 tez-runtime-library/
drwxr-xr-x 4 root root 4096 Jun 3 14:13 tez-tests/
drwxr-xr-x 9 root root 4096 Jun 3 17:00 tez-tools/
drwxr-xr-x 4 root root 4096 Jun 3 16:26 tez-ui/
#编译安装完成后,会在源码目录下的 tez-dist/target/ 中找到编译好的 Tez,其中,有两个版本,
#即 tez-0.10.1-SNAPSHOT-minimal.tar.gz 和 tez-0.10.1-SNAPSHOT.tar.gz,这两个就是我们需要的 Tez 程序。
root@kylin:~/wget/Tez/Tez-0.10.1# cd tez-dist/target/
root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# ll
total 74124
drwxr-xr-x 6 root root 4096 Jun 3 17:02 ./
drwxr-xr-x 5 root root 4096 Jun 3 17:00 ../
drwxr-xr-x 2 root root 4096 Jun 3 17:00 archive-tmp/
drwxr-xr-x 2 root root 4096 Jun 3 17:02 maven-archiver/
drwxr-xr-x 3 root root 4096 Jun 3 17:01 tez-0.10.1-SNAPSHOT/
drwxr-xr-x 3 root root 4096 Jun 3 17:02 tez-0.10.1-SNAPSHOT-minimal/
-rw-r--r-- 1 root root 19097221 Jun 3 17:02 tez-0.10.1-SNAPSHOT-minimal.tar.gz
-rw-r--r-- 1 root root 56772909 Jun 3 17:02 tez-0.10.1-SNAPSHOT.tar.gz
-rw-r--r-- 1 root root 2924 Jun 3 17:02 tez-dist-0.10.1-SNAPSHOT-tests.jar
root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# pwd
/root/wget/Tez/Tez-0.10.1/tez-dist/target
root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# mkdir -p /opt/tez/tez-0.10.1
root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# tar -zxf tez-0.10.1-SNAPSHOT.tar.gz -C /opt/tez/tez-0.10.1/
#root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# tar -zxf tez-0.10.1-SNAPSHOT-minimal.tar.gz -C /opt/tez/
root@kylin:~/wget/Tez/Tez-0.10.1/tez-dist/target# cd /opt/tez/
root@kylin:/opt/tez# ll
total 4508
drwxr-xr-x 4 root root 4096 Jun 3 19:12 ./
drwxr-xr-x 13 root root 4096 Jun 3 19:10 ../
-rw-r--r-- 1 root root 15021 Jun 3 03:26 LICENSE
-rw-r--r-- 1 root root 1465 Jun 3 03:26 LICENSE-BSD-3clause
-rw-r--r-- 1 root root 36261 Jun 3 03:26 LICENSE-CDDLv1.1-GPLv2_withCPE
-rw-r--r-- 1 root root 1045 Jun 3 03:26 LICENSE-MIT
-rw-r--r-- 1 root root 4128 Jun 3 03:26 LICENSE-SIL_OpenFontLicense-v1.1
-rw-r--r-- 1 root root 804 Jun 3 03:26 NOTICE
-rw-r--r-- 1 root root 8863 Jun 3 12:02 hadoop-shim-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 6415 Jun 3 17:00 hadoop-shim-2.8-0.10.1-SNAPSHOT.jar
drwxr-xr-x 2 root root 4096 Jun 3 19:12 lib/
drwxr-xr-x 3 root root 4096 Jun 3 19:11 tez-0.10.1/
-rw-r--r-- 1 root root 1090694 Jun 3 16:11 tez-api-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 5468 Jun 3 12:32 tez-build-tools-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 87490 Jun 3 12:32 tez-common-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 1452967 Jun 3 16:11 tez-dag-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 56861 Jun 3 12:35 tez-examples-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 79220 Jun 3 16:53 tez-history-parser-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 15328 Jun 3 17:00 tez-javadoc-tools-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 78996 Jun 3 17:00 tez-job-analyzer-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 299473 Jun 3 12:35 tez-mapreduce-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 80514 Jun 3 16:50 tez-protobuf-history-plugin-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 201472 Jun 3 12:32 tez-runtime-internals-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 797815 Jun 3 12:33 tez-runtime-library-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 159136 Jun 3 14:13 tez-tests-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 11533 Jun 3 16:51 tez-yarn-timeline-cache-plugin-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 28229 Jun 3 16:50 tez-yarn-timeline-history-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 7816 Jun 3 16:50 tez-yarn-timeline-history-with-acls-0.10.1-SNAPSHOT.jar
-rw-r--r-- 1 root root 23670 Jun 3 16:53 tez-yarn-timeline-history-with-fs-0.10.1-SNAPSHOT.jar
root@kylin:/opt/tez# chown -R hadoop:hadoop /opt/tez/
root@kylin:/opt/tez# su hadoop
hadoop@kylin:~$ cd /opt/tez/
hadoop@kylin:/opt/tez$ hadoop fs -mkdir /tez
hadoop@kylin:/opt/tez$ hadoop fs -put tez-0.10.1 /tez
hadoop@kylin:/opt/tez$ hadoop fs -chmod -R 777 /tez
hadoop@kylin:/opt/tez$ cd /etc/hive/conf/
hadoop@kylin:/etc/hive/conf$ vim tez-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>tez.lib.uris</name>
<value>${fs.defaultFS}/tez/tez-0.10.1,${fs.defaultFS}/tez/tez-0.10.1/lib</value>
</property>
<property>
<name>tez.lib.uris.classpath</name>
<value>${fs.defaultFS}/tez/tez-0.10.1,${fs.defaultFS}/tez/tez-0.10.1/lib</value>
</property>
<property>
<name>tez.use.cluster.hadoop-libs</name>
<value>true</value>
</property>
<property>
<name>tez.am.resource.memory.mb</name>
<value>2048</value>
</property>
<property>
<name>tez.am.resource.cpu.vcores</name>
<value>2</value>
</property>
</configuration>
# 设置 Hive 的默认引擎为 tez
hadoop@kylin:/etc/hive/conf$ vim hive-site.xml
---
<property>
<name>hive.execution.engine</name>
<value>tez</value>
</property>
---
root@kylin:~/wget# tar -zxf tez-0.10.1-SNAPSHOT-minimal.tar.gz -C /opt/tez/tez-0.10.1/
root@kylin:~/wget# chown -R hadoop:hadoop /opt/tez/
hadoop@kylin:/opt/tez$ ln -s tez-0.10.1 current
hadoop@kylin:/opt/tez$ vim /home/hadoop/.bashrc
---
# Tez
export TEZ_HOME=/opt/tez/current
---
hadoop@kylin:/opt/tez$ source /home/hadoop/.bashrc
hadoop@kylin:/opt/tez$ cd /etc/hive/conf/
hadoop@kylin:/etc/hive/conf$ vim hive-env.sh
---
# Tez
export TEZ_HOME=/opt/tez/current
export TEZ_JARS=""
for jar in `ls $TEZ_HOME |grep jar`; do
export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/$jar
done
for jar in `ls $TEZ_HOME/lib`; do
export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/lib/$jar
done
export HADOOP_CLASSPATH=${TEZ_JARS}/*:${TEZ_JARS}/lib/*
---
hadoop@kylin:/etc/hive/conf$ cd /opt/hive/current/conf/
hadoop@kylin:/opt/hive/current/conf$ ln -s /etc/hive/conf/tez-site.xml tez-site.xml
---
hadoop@kylin:~$ beeline
Beeline version 3.1.2 by Apache Hive
beeline> !connect jdbc:hive2://hadoop:10000/;auth=noSasl
Connecting to jdbc:hive2://hadoop:10000/;auth=noSasl
Enter username for jdbc:hive2://hadoop:10000/: user2
Enter password for jdbc:hive2://hadoop:10000/: *******
Connected to: Apache Hive (version 3.1.2)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://hadoop:10000/> show databases;
+----------------+
| database_name |
+----------------+
| default |
| test_bdw |
+----------------+
2 rows selected (0.865 seconds)
0: jdbc:hive2://hadoop:10000/> use default;
No rows affected (0.155 seconds)
0: jdbc:hive2://hadoop:10000/> show tables;
+---------------------------+
| tab_name |
+---------------------------+
| kylin_account |
| kylin_cal_dt |
| kylin_category_groupings |
| kylin_country |
| kylin_sales |
+---------------------------+
5 rows selected (0.178 seconds)
0: jdbc:hive2://hadoop:10000/> select count(*) from kylin_sales;
Error: Error while processing statement: FAILED: Execution Error, return code -101 from org.apache.hadoop.hive.ql.exec.tez.TezTask. tried to access field com.google.protobuf.AbstractMessage.memoizedSize from class org.apache.tez.dag.api.records.DAGProtos$ConfigurationProto (state=08S01,code=-101)
0: jdbc:hive2://hadoop:10000/> set hive.execution.engine;
+----------------------------+
| set |
+----------------------------+
| hive.execution.engine=tez |
+----------------------------+
1 row selected (0.023 seconds)
0: jdbc:hive2://hadoop:10000/> set hive.execution.engine=mr;
No rows affected (0.024 seconds)
0: jdbc:hive2://hadoop:10000/> select count(*) from kylin_sales;
+--------+
| _c0 |
+--------+
| 10000 |
+--------+
1 row selected (114.402 seconds)
0: jdbc:hive2://hadoop:10000/> !quit
Closing: 0: jdbc:hive2://hadoop:10000/;auth=noSasl
hadoop@kylin:~$ cat hadoopstart.sh
#!/bin/bash
##############################
#Hadoopstart.sh
#Author kylin
##############################
# Start Hadoop
echo "Start Hadoop..."
echo "[ namenode;secondarynamenode;datanode;resourcemanager;nodemanager;proxyserver;historyserver ]"
$HADOOP_HOME/bin/hdfs --daemon start namenode
$HADOOP_HOME/bin/hdfs --daemon start secondarynamenode
$HADOOP_HOME/bin/hdfs --daemon start datanode
$HADOOP_HOME/bin/yarn --daemon start resourcemanager
$HADOOP_HOME/bin/yarn --daemon start nodemanager
$HADOOP_HOME/bin/yarn --daemon start proxyserver
$HADOOP_HOME/bin/mapred --daemon start historyserver
# Start Hive
echo "Start Hive..."
echo "[ metastore;hiveserver2 ]"
# cd $HIVE_HOME
# $HIVE_HOME/bin/hiveserver2 >/dev/null 2>&1 &
# $HIVE_HOME/hcatalog/sbin/hcat_server.sh start
# $HIVE_HOME/hcatalog/bin/hcat
# $HIVE_HOME/hcatalog/sbin/webhcat_server.sh start
nohup hive --service metastore > $HIVE_HOME/logs/metastore.log 2>&1 &
nohup hive --service hiveserver2 > $HIVE_HOME/logs/hiveserver2.log 2>&1 &
# Start Zookeeper
echo "Start Zookeeper..."
$ZK_HOME/bin/zkServer.sh start
# Start HBase
echo "Start HBase..."
echo "[ master;regionserver ]"
$HBASE_HOME/bin/hbase-daemon.sh start master
$HBASE_HOME/bin/hbase-daemon.sh start regionserver
# Start Kafka
echo "Start Kafka..."
echo "[ kafka0;kafka1;kafka2 ]"
nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server.properties > $KAFKA_HOME/logs/nohup.out 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server-1.properties > $KAFKA_HOME/logs/nohup.out-1 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_CONF_DIR/server-2.properties > $KAFKA_HOME/logs/nohup.out-2 2>&1 &
# Start Spark
echo "Start Spark..."
echo "[ master;worker ]"
$SPARK_HOME/sbin/start-master.sh
$SPARK_HOME/sbin/start-slave.sh spark://kylin.localdomain:7077
# Satrt Flink
echo "Start Flink..."
echo "[ jobmanager;taskmanager ]"
$FLINK_HOME/bin/start-cluster.sh
# $FLINK_HOME/bin/jobmanager.sh start
# $FLINK_HOME/bin/taskmanager.sh start
# Start Kylin
echo "Start Kylin..."
#$KYLIN_HOME/bin/check-env.sh
nohup $KYLIN_HOME/bin/kylin.sh start > $KYLIN_HOME/logs/nohup.out-startkylin.log 2>&1 &
sleep 3m
Info=`cat /opt/kylin/current/logs/nohup.out-startkylin.log | awk 'END{print}' | awk '{print $1}'`
if [[ $Info = Web ]];then
echo "A new Kylin instance is started by hadoop. To stop it, run '$KYLIN_HOME/bin/kylin.sh stop'"
echo "Check the log at /opt/kylin/current/logs/nohup.out-startkylin.log"
echo "Web UI is at http://kylin.localdomain:7070/kylin"
else
if [[ $Info = ERROR: ]];then
nohup $KYLIN_HOME/bin/kylin.sh start > $KYLIN_HOME/logs/nohup.out-startkylin.log 2>&1 &
sleep 3m
END=`cat /opt/kylin/current/logs/nohup.out-startkylin.log | awk 'END{print}'`
echo "$END"
else
echo "$Info"
fi
fi
/bin/bash /home/hadoop/hadoopstart.sh
hadoop@kylin:~$ cat hadoopstop.sh
#!/bin/bash
##############################
#Hadoopstop.sh
#Author kylin
##############################
echo "Stop Kylin..."
# Stop Kylin
$KYLIN_HOME/bin/kylin.sh stop
# Stop Flink
echo "Stop Flink..."
$FLINK_HOME/bin/stop-cluster.sh
# $FLINK_HOME/bin/jobmanager.sh stop
# $FLINK_HOME/bin/taskmanager.sh stop
echo "Stop Spark..."
# Stop Spark
$SPARK_HOME/sbin/stop-slave.sh spark://kylin.localdomain:7077
$SPARK_HOME/sbin/stop-master.sh
echo "Stop Kafka..."
# Stop Kafka
nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server.properties > $KAFKA_HOME/logs/nohup.out 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server-1.properties > $KAFKA_HOME/logs/nohup.out-1 2>&1 &
nohup $KAFKA_HOME/bin/kafka-server-stop.sh $KAFKA_CONF_DIR/server-2.properties > $KAFKA_HOME/logs/nohup.out-2 2>&1 &
sleep 3s
#jps | grep Kafka | awk '{print $1}' | xargs kill -9
echo "Stop HBase..."
# Stop HBase
$HBASE_HOME/bin/hbase-daemon.sh stop master
$HBASE_HOME/bin/hbase-daemon.sh stop regionserver
echo "Stop Zookeeper..."
# Stop Zookeeper
$ZK_HOME/bin/zkServer.sh stop
echo "Stop Hive..."
# Kill Hiveserver
cd $HIVE_HOME
# $HIVE_HOME/hcatalog/sbin/webhcat_server.sh stop
# $HIVE_HOME/hcatalog/sbin/hcat_server.sh stop
jps | grep RunJar | awk '{print $1}' | xargs kill -9
echo "Stop Hadoop..."
# Stop Hadoop
$HADOOP_HOME/bin/mapred --daemon stop historyserver
$HADOOP_HOME/bin/yarn --daemon stop proxyserver
$HADOOP_HOME/bin/yarn --daemon stop nodemanager
$HADOOP_HOME/bin/yarn --daemon stop resourcemanager
$HADOOP_HOME/bin/hdfs --daemon stop datanode
$HADOOP_HOME/bin/hdfs --daemon stop secondarynamenode
$HADOOP_HOME/bin/hdfs --daemon stop namenode
/bin/bash /home/hadoop/hadoopstop.sh