本案例在使用了编译好的atlas 版本号1.0.0 ,本文重在讲解如何集成;编译atlas,可以在其他博客中找一下
1.华为FusionInsight 大数据平台 自带的solr版本和atlas版本(1.0.0)存在不兼容的情况;我为了赶工期,没有特意深究,自己搭建了solr cloud 模式,博客可以参考:https://blog.csdn.net/u010235716/article/details/104946962
2. 准备好jdk环境变量;
#解压
unzip atlas-1.0.0.zip
[root@SHB-L0120794 atlas-1.0.0]# cd /data/atlas-1.0.0/distro/target/
[root@SHB-L0120794 target]# ll
total 374316
drwxr-xr-x 11 root root 4096 Mar 17 12:11 apache-atlas-1.0.0
drwxr-xr-x 3 root root 4096 Nov 12 2018 apache-atlas-1.0.0-bin.bak
-rw-r--r-- 1 root root 269882584 Nov 12 2018 apache-atlas-1.0.0-bin.tar.gz
drwxr-xr-x 3 root root 4096 Nov 12 2018 apache-atlas-1.0.0-falcon-hook
-rw-r--r-- 1 root root 8984504 Nov 12 2018 apache-atlas-1.0.0-falcon-hook.tar.gz
drwxr-xr-x 3 root root 4096 Nov 12 2018 apache-atlas-1.0.0-hbase-hook
-rw-r--r-- 1 root root 16618230 Nov 12 2018 apache-atlas-1.0.0-hbase-hook.tar.gz
drwxr-xr-x 3 root root 4096 Nov 12 2018 apache-atlas-1.0.0-hive-hook
-rw-r--r-- 1 root root 20269877 Nov 12 2018 apache-atlas-1.0.0-hive-hook.tar.gz
drwxr-xr-x 3 root root 4096 Nov 12 2018 apache-atlas-1.0.0-kafka-hook
-rw-r--r-- 1 root root 9021206 Nov 12 2018 apache-atlas-1.0.0-kafka-hook.tar.gz
drwxr-xr-x 3 root root 4096 Nov 12 2018 apache-atlas-1.0.0-migration-exporter
-rw-r--r-- 1 root root 5696 Nov 12 2018 apache-atlas-1.0.0-migration-exporter.zip
-rw-r--r-- 1 root root 10349836 Nov 12 2018 apache-atlas-1.0.0-sources.tar.gz
drwxr-xr-x 3 root root 4096 Nov 12 2018 apache-atlas-1.0.0-sqoop-hook
-rw-r--r-- 1 root root 8969601 Nov 12 2018 apache-atlas-1.0.0-sqoop-hook.tar.gz
drwxr-xr-x 3 root root 4096 Nov 12 2018 apache-atlas-1.0.0-storm-hook
-rw-r--r-- 1 root root 39003696 Nov 12 2018 apache-atlas-1.0.0-storm-hook.tar.gz
drwxr-xr-x 2 root root 4096 Nov 12 2018 archive-tmp
-rw-r--r-- 1 root root 94839 Nov 12 2018 atlas-distro-1.0.0.jar
drwxr-xr-x 2 root root 4096 Nov 12 2018 bin
drwxr-xr-x 5 root root 4096 Nov 12 2018 conf
drwxr-xr-x 2 root root 4096 Nov 12 2018 maven-archiver
drwxr-xr-x 3 root root 4096 Nov 12 2018 maven-shared-archive-resources
drwxr-xr-x 2 root root 4096 Nov 12 2018 META-INF
-rw-r--r-- 1 root root 3493 Nov 12 2018 rat.txt
drwxr-xr-x 3 root root 4096 Nov 12 2018 test-classes
#编辑
vim /data/atlas-1.0.0/distro/target/apache-atlas-1.0.0/conf/atlas-application.properties
#配置存储
atlas.graph.storage.backend=hbase
#配置表名
atlas.graph.storage.hbase.table=atlas
#Hbase
#For standalone mode , specify localhost
#for distributed mode, specify zookeeper quorum here
#配置主机名称
atlas.graph.storage.hostname=shb-l0120794,shb-l0120795,shb-l0120796
######### Entity Audit Configs #########
#该表会在hbase中被自动创建
atlas.audit.hbase.tablename=apache_atlas_entity_audit
atlas.audit.zookeeper.session.timeout.ms=1000
#配置zk主机
atlas.audit.hbase.zookeeper.quorum=shb-l0120794,shb-l0120795,shb-l0120796
或者将如下hbase安装目录下的全部文件,复制到atlas的hbase目录
/opt/huawei/Bigdata/FusionInsight_HD_V100R002C70SPC200/1_27_RegionServer/etc
export HBASE_CONF_DIR=/data/atlas-1.0.0/distro/target/apache-atlas-1.0.0/conf/hbase
[root@SHB-L0120794 conf]# pwd
/data/atlas-1.0.0/distro/target/apache-atlas-1.0.0/conf
[root@SHB-L0120794 conf]# ll
total 88
-rw-r--r-- 1 root root 12319 Mar 19 21:20 atlas-application.properties
-rw-r--r-- 1 root root 3353 Mar 17 13:45 atlas-env.sh
-rw-r--r-- 1 root root 5164 Mar 19 20:30 atlas-log4j.xml
-rw-r--r-- 1 root root 5156 Mar 19 20:28 atlas-log4j.xml.info
-rw-r--r-- 1 root root 1459 Nov 9 2018 atlas-simple-authz-policy.json
-rw-r--r-- 1 root root 31403 Nov 9 2018 cassandra.yml.template
drwxr-xr-x 2 root root 4096 Mar 19 20:16 hbase
drwxr-xr-x 3 root root 4096 Mar 17 12:01 solr
-rw-r--r-- 1 root root 207 Nov 9 2018 users-credentials.properties
drwxr-xr-x 2 root root 4096 Mar 17 12:01 zookeeper
[root@SHB-L0120794 conf]#
重命名solr文件夹为apache-atlas-conf
[root@SHB-L0120794 solr5.5_1]# pwd
/data/solrcloud/solr5.5_1
[root@SHB-L0120794 solr5.5_1]# ll
total 1240
drwxr-xr-x 3 root root 4096 Mar 18 10:53 apache-atlas-conf
drwxr-xr-x 3 root root 4096 Mar 18 12:21 bin
-rw-r--r-- 1 root root 555321 May 1 2016 CHANGES.txt
drwxr-xr-x 13 root root 4096 May 1 2016 contrib
drwxr-xr-x 4 root root 4096 Mar 17 14:53 dist
drwxr-xr-x 19 root root 4096 Mar 17 14:53 docs
drwxr-xr-x 7 root root 4096 Mar 17 14:53 example
drwxr-xr-x 2 root root 36864 Mar 17 14:53 licenses
-rw-r--r-- 1 root root 12646 Feb 1 2016 LICENSE.txt
-rw-r--r-- 1 root root 590277 May 1 2016 LUCENE_CHANGES.txt
-rw-r--r-- 1 root root 26529 Feb 1 2016 NOTICE.txt
-rw-r--r-- 1 root root 7162 May 1 2016 README.txt
drwxr-xr-x 11 root root 4096 Mar 17 14:53 server
[root@SHB-L0120794 solr5.5_1]#
#编辑
vim /data/atlas-1.0.0/distro/target/apache-atlas-1.0.0/conf/atlas-application.properties
#配置项目
# Graph Search Index
#配置查询索引
atlas.graph.index.search.backend=solr
#Solr
#Solr cloud mode properties
atlas.graph.index.search.solr.mode=cloud
#配置solr zk
atlas.graph.index.search.solr.zookeeper-url=shb-l0120794:24002,shb-l0120795:24002,shb-l0120796:24002
bash /data/solrcloud/solr5.5_1/bin/solr create -c vertex_index -d /data/solrcloud/solr5.5_1/apache-atlas-conf -shards 2 -replicationFactor 2
bash /data/solrcloud/solr5.5_1/bin/solr create -c edge_index -d /data/solrcloud/solr5.5_1/apache-atlas-conf -shards 2 -replicationFactor 2
bash /data/solrcloud/solr5.5_1/bin/solr create -c fulltext_index -d /data/solrcloud/solr5.5_1/apache-atlas-conf -shards 2 -replicationFactor 2
#编辑
vim /data/atlas-1.0.0/distro/target/apache-atlas-1.0.0/conf/atlas-application.properties
#配置
######### Notification Configs #########
#默认true
atlas.notification.embedded=false
atlas.kafka.data=${sys:atlas.home}/data/kafka
#配置kafka在zk目录位置
atlas.kafka.zookeeper.connect=shb-l0120794:24002,shb-l0120795:24002,shb-l0120796:24002/kafka
#配置kafka所在主机和端口
atlas.kafka.bootstrap.servers=shb-l0120794:21005,shb-l0120795:21005,shb-l0120796:21005
atlas.kafka.zookeeper.session.timeout.ms=400
atlas.kafka.zookeeper.connection.timeout.ms=200
atlas.kafka.zookeeper.sync.time.ms=20
atlas.kafka.auto.commit.interval.ms=1000
atlas.kafka.hook.group.id=atlas
atlas.kafka.enable.auto.commit=false
atlas.kafka.auto.offset.reset=earliest
atlas.kafka.session.timeout.ms=30000
atlas.kafka.offsets.topic.replication.factor=1
atlas.kafka.poll.timeout.ms=1000
atlas.notification.create.topics=true
atlas.notification.replicas=1
#如下topic会自动在kafka中被创建
atlas.notification.topics=ATLAS_HOOK,ATLAS_ENTITIES
atlas.notification.log.failed.messages=true
atlas.notification.consumer.retry.interval=500
atlas.notification.hook.retry.interval=1000
#编辑
vim /data/atlas-1.0.0/distro/target/apache-atlas-1.0.0/conf/atlas-application.properties
#配置 修改集群名称和华为大数据集成名称一致:非常重要
atlas.cluster.name=hacluster
hive.exec.post.hooks
org.apache.atlas.hive.hook.HiveHook
[root@SHB-L0120795 metastore]# cd /opt/huawei/Bigdata/FusionInsight_HD_V100R002C70SPC200/install/FusionInsight-Hive-1.3.0/hive-1.3.0/conf
[root@SHB-L0120795 conf]# ll
total 48
-rw-r--r-- 1 root root 12313 Mar 19 20:58 atlas-application.properties
-rw------- 1 omm wheel 1139 Nov 8 2017 beeline-log4j.properties.template
-rw------- 1 omm wheel 3454 Nov 8 2017 gc-opts.sh
-rw------- 1 omm wheel 2662 Nov 8 2017 hive-exec-log4j.properties.template
-rw------- 1 omm wheel 3050 Nov 8 2017 hive-log4j.properties.template
[root@SHB-L0120795 conf]#
执行命令bin/atlas_start.py启动atlas服务
cd /data/atlas-1.0.0/distro/target/apache-atlas-1.0.0
#启动
bin/atlas_start.py
http://IP:21000/login.jsp 登陆验证, 用户名:admin 密码:admin
2020-03-19 20:04:18,742 WARN - [main:] ~ Failed to identify the fs of dir hdfs://hacluster/hbase/lib, ignored (DynamicClassLoader:106)
java.io.IOException: Couldn't create proxy provider null
at org.apache.hadoop.hdfs.NameNodeProxies.createFailoverProxyProvider(NameNodeProxies.java:515)
解决方案:hdfs://hacluster名称 为华为大数据集群环境名,需要绑定到atlas中去
#编辑
vim /data/atlas-1.0.0/distro/target/apache-atlas-1.0.0/conf/atlas-application.properties
#配置 修改集群名称和华为大数据集成名称一致:非常重要
atlas.cluster.name=hacluster
Class org.apache.hadoop.hdfs.server.namenode.ha.BlackListingFailoverProxyProvider not found
解决方案:webUI界面 服务管理 - HDFS - 服务配置 - 全部配置 :搜索:BlackListingFailoverProxyProvider 切换到
org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
2020-03-19 20:04:27,835 ERROR - [main:] ~ Exception in getKafkaConsumer (KafkaNotification:236)
org.apache.kafka.common.KafkaException: Failed to construct kafka consumer
at org.apache.kafka.clients.consumer.KafkaConsumer.(KafkaConsumer.java:781)
解决方案:在zk的管理目录,注意kafka的层级结构
#配置kafka在zk目录位置
atlas.kafka.zookeeper.connect=shb-l0120794:24002,shb-l0120795:24002,shb-l0120796:24002/kafka