1、JDK安装,省略
2、 免密码登录
如果这一步失败,请先执行上面登录本机步骤:cd ~/.ssh/
会有提示,都按回车就可以:ssh-keygen -t rsa
加入授权* :cat ./id_rsa.pub >> ./authorized_keys
再一次实验登录本机,发现可以直接登录不用密码了:ssh localhost
3.1 修改配置文件 core-site.xml
进入目录:cd /usr/local/hadoop/hadoop-3.1.3/etc/hadoop
修改文件:sudo vim ./core-site.xml
3.2 修改配置文件 hdfs-site.xml
sudo vim ./hdfs-site.xml
[root@hadoop hadoop]# rm -rf /usr/local/software/hadoop/hdfs/name/
[root@hadoop hadoop]# rm -rf /usr/local/software/hadoop/hdfs/data/
3.3 执行NameNode的格式化
cd /usr/local/hadoop/hadoop-3.1.3
./bin/hdfs namenode -format
如果报错执行
hadoop: ERROR: Unable to kill 95115 Stopping datanodes
删除root用户启动时的残留数据
#rm -rf /tmp/hadoop* /tmp/hsperfdata*
二、安装hive
[root@hadoop 4_hive]# ls
apache-hive-3.1.2-bin apache-hive-3.1.2-bin.tar.gz
[root@hadoop 4_hive]# mv apache-hive-3.1.2-bin ../hive
[root@hadoop conf]# cat hive-site.xml
拷贝驱动
拷贝mysql驱动包到hive/lib文件中
初始化
[root@hadoop app]# schematool -dbType mysql -initSchema
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/app/hive/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/software/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Metastore connection URL: jdbc:mysql://192.168.10.235:3306/hive312?createDatabaseIfNotExist=true
Metastore Connection Driver : com.mysql.jdbc.Driver
Metastore connection User: root
Starting metastore schema initialization to 3.1.0
Initialization script hive-schema-3.1.0.mysql.sql
[root@hadoop bin]# sh hiveserver2 &
User: root is not allowed to impersonate anonymous
提示没有权限
解决办法:
修改Hadoop配置文件core-site.xml
在里面添加如下内容:
验证:
[root@hadoop bin]# ps -ef |grep hive
root 33372 28260 8 14:27 pts/2 00:00:34 /usr/java/jdk1.8.0_131/bin/java -Dproc_jar -Dproc_hiveserver2 -Dlog4j.configurationFile=hive-log4j2.properties -Djava.util.logging.config.file=/opt/app/hive/conf/parquet-logging.properties -Djline.terminal=jline.UnsupportedTerminal -Dyarn.log.dir=/usr/local/software/hadoop/logs -Dyarn.log.file=hadoop.log -Dyarn.home.dir=/usr/local/software/hadoop -Dyarn.root.logger=INFO,console -Djava.library.path=/usr/local/software/hadoop/lib/native -Xmx256m -Dhadoop.log.dir=/usr/local/software/hadoop/logs -Dhadoop.log.file=hadoop.log -Dhadoop.home.dir=/usr/local/software/hadoop -Dhadoop.id.str=root -Dhadoop.root.logger=INFO,console -Dhadoop.policy.file=hadoop-policy.xml -Dhadoop.security.logger=INFO,NullAppender org.apache.hadoop.util.RunJar /opt/app/hive/lib/hive-service-3.1.2.jar org.apache.hive.service.server.HiveServer2
root 96507 28260 0 14:34 pts/2 00:00:00 grep --color=auto hive
[root@hadoop bin]# beeline -u jdbc:hive2://192.168.10.234:10000
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/app/hive/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/software/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Connecting to jdbc:hive2://192.168.10.234:10000
Connected to: Apache Hive (version 3.1.2)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 3.1.2 by Apache Hive
0: jdbc:hive2://192.168.10.234:10000> show databases;
INFO : Compiling command(queryId=root_20230327143450_7937d0bb-d45a-4c9d-bcbe-5b3e59d3c0be): show databases
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Semantic Analysis Completed (retrial = false)
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:database_name, type:string, comment:from deserializer)], properties:null)
INFO : Completed compiling command(queryId=root_20230327143450_7937d0bb-d45a-4c9d-bcbe-5b3e59d3c0be); Time taken: 1.034 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing command(queryId=root_20230327143450_7937d0bb-d45a-4c9d-bcbe-5b3e59d3c0be): show databases
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing command(queryId=root_20230327143450_7937d0bb-d45a-4c9d-bcbe-5b3e59d3c0be); Time taken: 0.041 seconds
INFO : OK
INFO : Concurrency mode is disabled, not creating a lock manager
+----------------+
| database_name |
+----------------+
| default |
+----------------+
1 row selected (1.469 seconds)
0: jdbc:hive2://192.168.10.234:10000>
三、安装zk
具体过程省略
Problem starting AdminServer on address 0.0.0.0, port 8080
我们可以修改在zoo.cfg中修改AdminServer的端口:
admin.serverPort=8888
java.io.IOException: Could not start ZK at requested port of 2181. ZK was started at port: 2182. Aborting as clients (e.g. shell) will not be able to find this ZK quorum.
at org.apache.hadoop.hbase.master.HMasterCommandLine.startMaster(HMasterCommandLine.java:217)
at org.apache.hadoop.hbase.master.HMasterCommandLine.run(HMasterCommandLine.java:140)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
at org.apache.hadoop.hbase.util.ServerCommandLine.doMain(ServerCommandLine.java:149)
at org.apache.hadoop.hbase.master.HMaster.main(HMaster.java:3080)
在hbase-site.xml中增加一条配置信息:
验证:
http://192.168.10.234:16030/processRS.jsp
五、Solr安装
修改solr.in.sh配置,添加zk
[root@hadoop bin]# ./solr start &
[3] 92208
[root@hadoop bin]# *** [WARN] *** Your open file limit is currently 1024.
It should be set to 65000 to avoid operational disruption.
If you no longer wish to see this warning, set SOLR_ULIMIT_CHECKS to false in your profile or solr.in.sh
WARNING: Starting Solr as the root user is a security risk and not considered best practice. Exiting.
Please consult the Reference Guide. To override this check, start with argument '-force'
编辑limits.conf文件:
sudo vim /etc/security/limits.conf
1.针对Your open file limit is currently 1024,增加hard nofile和soft nofile的配置
2.针对Your Max Processes Limit is currently 47448,增加hard nproc和soft nproc的配置
1和2合起来就是在limits.conf文件追加4行:
* hard nofile 65535
* soft nofile 65535
* hard nproc 65535
* soft nproc 65535
[root@hadoop bin]# ls
init.d install_solr_service.sh oom_solr.sh post solr solr-8983.pid solr.cmd solr.in.cmd solr.in.sh
[root@hadoop bin]# ./solr start &
[1] 81578
[root@hadoop bin]# WARNING: Starting Solr as the root user is a security risk and not considered best practice. Exiting.
Please consult the Reference Guide. To override this check, start with argument '-force'
^C
[1]+ 退出 1 ./solr start
切换非root启动
[root@hadoop ~]# solr stop -all
bash: solr: 未找到命令...
[root@hadoop ~]# cd /opt/app/solr/bin/
[root@hadoop bin]# ./solr stop -all
[root@hadoop bin]# ./solr -e dih
ERROR: Unsupported example dih ! Please choose one of: cloud, dih, schemaless, or techproducts
[root@hadoop bin]#
[guolin@hadoop bin]$ ./solr start &
[1] 44230
[guolin@hadoop bin]$ Waiting up to 180 seconds to see Solr running on port 8983 [\]
Started Solr server on port 8983 (pid=44429). Happy searching!
http://192.168.10.234:8983/solr/#/
六、安装kafka
解压,修改conf目录下面的server.properties文件
ZK 地址,plentix地址等
启动
./kafka-server-start ../config/server.properties &
七、安装Atlas
1、解压
tar -zxvf apache-atlas-2.1.0-server.tar.gz
2、Atlas 集成 Hbase
修改 /atlas/conf/atlas-application.properties 配置文件中的以下参数
atlas.graph.storage.hostname=hadoop102:2181,hadoop103:2181,hadoop104:2181
修改 /opt/module/atlas/conf/atlas-env.sh 配置文件,增加以下内容
export HBASE_CONF_DIR=/opt/module/hbase/conf
3. Atlas 集成 Solr
Atlas集成Solr
修改atlas/conf/atlas-application.properties配置文件,命令:vim /home/atlas/atlas/conf/atlas-application.properties
#Solr 这里的注释掉
#Solr cloud mode properties
#atlas.graph.index.search.solr.mode=cloud
#atlas.graph.index.search.solr.zookeeper-url=
#atlas.graph.index.search.solr.zookeeper-connect-timeout=60000
#atlas.graph.index.search.solr.zookeeper-session-timeout=60000
#atlas.graph.index.search.solr.wait-searcher=true#Solr http mode properties
atlas.graph.index.search.solr.mode=http
atlas.graph.index.search.solr.http-urls=http://localhost:2181/solr
复制文件,命令:cp -rf /home/atlas/atlas/conf/solr /home/atlas/solr/atlas_conf
执行下列命令
sudo -i -u solr /home/atlas/solr/bin/solr create -c vertex_index -d /home/atlas/solr/atlas_conf
[root@hadoop conf]# cp -rf solr/ /opt/app/altas/solr/atlas_conf
cp: 无法创建目录"/opt/app/altas/solr/atlas_conf": 没有那个文件或目录
[root@hadoop conf]# mkdir -p /opt/app/altas/solr
[root@hadoop conf]# cp -rf solr/ /opt/app/altas/solr/atlas_conf
[root@hadoop conf]# sudo -i -u guolin /opt/app/solr/bin/solr create -c vertex_index -d /opt/app/altas/solr/atlas_conf
Created collection 'vertex_index' with 1 shard(s), 1 replica(s) with config-set 'vertex_index'
[root@hadoop conf]#
4、Atlas集成Kafka
修改atlas/conf/atlas-application.properties配置文件,命令:vim /home/atlas/atlas/conf/atlas-application.properties
atlas.notification.embedded=false
atlas.kafka.data=/home/atlas/kafka/data
atlas.kafka.zookeeper.connect=localhost:2181/kafka
atlas.kafka.bootstrap.servers=localhost:9092
5、Atlas Server 配置
修改atlas/conf/atlas-application.properties配置文件,命令:vim /home/atlas/atlas/conf/atlas-application.properties
atlas.server.run.setup.on.start=false
修改atlas-log4j.xml文件,命令:vim /home/atlas/atlas/conf/atlas-log4j.xml
#去掉下面代码的注释
6、Atlas集成Hive
Atlas集成Hive
修改atlas/conf/atlas-application.properties配置文件,命令:vim /home/atlas/atlas/conf/atlas-application.properties
#在文件末尾追加
######### Hive Hook Configs #######
atlas.hook.hive.synchronous=false
atlas.hook.hive.numRetries=3
atlas.hook.hive.queueSize=10000
atlas.cluster.name=primary
修改hive程序中hive-site.xml文件,命令:vim hive/conf/hive-site.xml
#在configuration标签里追加
7、安装Hive Hook
解压Hive Hook,命令:tar -zxvf apache-atlas-2.1.0-hive-hook.tar.gz
将Hive Hook目录里的文件依赖复制到Atlas 安装路径,命令:cp -r apache-atlas-hive-hook-2.1.0/* /home/atlas/atlas/
修改hive/conf/hive-env.sh配置文件,命令:vim /home/atlas/hive/conf/hive-env.sh
export HIVE_AUX_JARS_PATH=/home/atlas/atlas/hook/hive
将Atlas 配置文件/home/atlas/atlas/conf/atlas-application.properties 拷贝到/home/atlas/hive/conf 目录,
命令:cp /home/atlas/atlas/conf/atlas-application.properties /home/atlas/hive/conf/
[root@hadoop 9_atlas]# cd apache-atlas-hive-hook-2.1.0/
[root@hadoop apache-atlas-hive-hook-2.1.0]# ls
hook hook-bin
[root@hadoop apache-atlas-hive-hook-2.1.0]# cd /opt/app/altas/
[root@hadoop altas]# ls
bin conf DISCLAIMER.txt LICENSE models NOTICE server solr tools
[root@hadoop altas]# cp -r /opt/app/9_atlas/apache-atlas-hive-hook-2.1.0/* .
[root@hadoop altas]# ls
bin conf DISCLAIMER.txt hook hook-bin LICENSE models NOTICE server solr tools
[root@hadoop altas]# cd /opt/app/hive/conf/
[root@hadoop conf]# ls
beeline-log4j2.properties.template hive-log4j2.properties.template llap-daemon-log4j2.properties.template
hive-default.xml.template hive-site.xml nohup.out
hive-env.sh.template ivysettings.xml parquet-logging.properties
hive-exec-log4j2.properties.template llap-cli-log4j2.properties.template
[root@hadoop conf]# cp hive-env.sh.template hive-env.sh
[root@hadoop conf]# vi hive-env.sh
[root@hadoop conf]# cat hive-env.sh
export HIVE_AUX_JARS_PATH=/opt/app/atals/hook/hive
[root@hadoop conf]# cat /opt/app/
1_jdk8/ 4_hive/ 7_hbase/ altas/ hive/ nohup.out zookeper/
2_hadoop/ 5_zookeeper/ 8_solr/ hadoop/ init.sh solr/
3_mysql/ 6_kafka/ 9_atlas/ hbase/ kafka/ zookeeper/
[root@hadoop conf]# cat /opt/app/altas/hook
hook/ hook-bin/
[root@hadoop conf]# cat /opt/app/altas/hook/hive/
cat: /opt/app/altas/hook/hive/: 是一个目录
[root@hadoop conf]# ll
总用量 344
-rwxr-xr-x 1 root root 1596 8月 23 2019 beeline-log4j2.properties.template
-rwxr-xr-x 1 root root 300482 7月 16 2020 hive-default.xml.template
-rwxr-xr-x 1 root root 2385 3月 31 14:17 hive-env.sh
-rwxr-xr-x 1 root root 2365 8月 23 2019 hive-env.sh.template
-rwxr-xr-x 1 root root 2274 8月 23 2019 hive-exec-log4j2.properties.template
-rwxr-xr-x 1 root root 3086 8月 23 2019 hive-log4j2.properties.template
-rw-r--r-- 1 root root 965 3月 31 11:42 hive-site.xml
-rwxr-xr-x 1 root root 2060 8月 23 2019 ivysettings.xml
-rwxr-xr-x 1 root root 3558 8月 23 2019 llap-cli-log4j2.properties.template
-rwxr-xr-x 1 root root 7163 8月 23 2019 llap-daemon-log4j2.properties.template
-rw------- 1 root root 1231 3月 27 14:34 nohup.out
-rwxr-xr-x 1 root root 2662 8月 23 2019 parquet-logging.properties
[root@hadoop conf]# vi hive-env.sh
[root@hadoop conf]# pwd
/opt/app/hive/conf
[root@hadoop conf]# ls
beeline-log4j2.properties.template hive-exec-log4j2.properties.template llap-cli-log4j2.properties.template
hive-default.xml.template hive-log4j2.properties.template llap-daemon-log4j2.properties.template
hive-env.sh hive-site.xml nohup.out
hive-env.sh.template ivysettings.xml parquet-logging.properties
[root@hadoop conf]# cp /opt/app/altas/conf/atlas-application.properties .
[root@hadoop conf]# ls
atlas-application.properties hive-exec-log4j2.properties.template llap-daemon-log4j2.properties.template
beeline-log4j2.properties.template hive-log4j2.properties.template nohup.out
hive-default.xml.template hive-site.xml parquet-logging.properties
hive-env.sh ivysettings.xml
hive-env.sh.template llap-cli-log4j2.properties.template
[root@hadoop conf]# cd /opt/altas/
[root@hadoop altas]# ls
[root@hadoop altas]# ls
[root@hadoop altas]# cd /opt/app/altas/
[root@hadoop altas]# ls
bin conf DISCLAIMER.txt hook hook-bin LICENSE models NOTICE server solr tools
[root@hadoop altas]# cd bin/
[root@hadoop bin]# ls
atlas_admin.py atlas_kafka_setup_hook.py atlas_stop.py quick_start.py
atlas_client_cmdline.py atlas_kafka_setup.py atlas_update_simple_auth_json.py quick_start_v1.py
atlas_config.py atlas_start.py cputil.py
[root@hadoop bin]# ./atlas_start.py
执行启动脚本,命令:./atlas_start.py,等待2min
[root@hadoop bin]# ./atlas_start.py
starting atlas on host localhost
starting atlas on port 21000
................................
Apache Atlas Server started!!!
1、启动Hadoop,命令:/opt/app/目录中进入 bin/start-all.sh
2、启动Zookeeper,命令:/opt/app/zookeeper/bin/zkServer.sh start/
3、启动Kafka,命令:/opt/app/kafka/bin/kafka-server-start.sh -daemon /opt/app/config/server.properties
4、启动Hbase,命令://opt/app/hbase/bin/start-hbase.sh
5、启动Solr,命令:sudo -i -u guolin /opt/app/solr/bin/solr start
6、启动Atlas服务,进入atlas的bin目录,命令:cd /opt/app/atlas/bin
Hadoop:http://192.168.10.234:9870/explorer.html#/
Hbase:http://192.168.10.234:16030/rs-status
solr地址:http://192.168.10.234:8983/solr/#/
[root@hadoop bin]# jps
#Hadoop进程
47731 ResourceManager
43726 SecondaryNameNode
37990 NameNode
50247 NodeManager
40456 DataNode
#HBase进程
20563 HMaster
21576 HRegionServer
#Atlas、Kafka、QuorumPeerMain进程
69444 Atlas
42216 Kafka
90014 QuorumPeerMain
99726 Jps
44429 jar
http://192.168.10.234:9870/explorer.html#/