1. Currently eagle is tested on **JDK-1.7.X**, currently (v0.4.0) not supporting JDK 1.8.
2. **NPM** should be installed (On MAC OS try "brew install node"), this is a prerequisite.
npm 必须要安装,否则编译时候会报错:
[INFO] eagle-webservice ................................... FAILURE [03:03 min]
Failed toexecute goal org.codehaus.mojo:exec-maven-plugin:1.5.0:exec (exec-ui-install) on project eagle-webservice: Command execution failed. Process exited with an error: 1 (Exit value: 1)
3. Eagle is built using [Apache Maven](https://maven.apache.org/).
$ tar -zxvf apache-eagle-0.4.0-incubating-src.tar.gz
$ cd apache-eagle-0.4.0-incubating-src
$ curl -O https://patch-diff.githubusercontent.com/raw/apache/incubator-eagle/pull/268.patch
$ git apply 268.patch
$ mvn clean package -DskipTests
二、安装
2.0 环境依赖
For streaming platform dependencies
# 安装且配置环境变量STORM_HOME
JAVA_HOME=/data/jdk1.7.0_79 STORM_HOME=/data/storm PATH=$PATH:$JAVA_HOME/bin:$STORM_HOME/bin CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar export JAVA_HOME export STORM_HOME export CLASSPATH export PATH
# 配置storm,有三个配置eagle会用到
# 启动 storm########### These MUST be filled in for a storm configuration storm.zookeeper.servers: - "172.17.32.99" # - "server2" # nimbus.host: "172.17.32.99" nimbus.thrift.port: 6627 storm.local.dir: "/var/storm" supervisor.slots.ports: - 6700 - 6701 - 6702 - 6703 ui.port: 8099
nohup bin/storm nimbus >> /dev/null & nohup bin/storm supervisor >> /dev/null & nohup bin/storm ui >> /dev/null &
For database dependencies (Choose one of them)
本次选择mysql ,先创建eagle库 create database eagle; grant all privileges on eagle.* to eagle@'%' identified by 'eagle'; flush privileges;
2.1 解压
$ tar -zxvf apache-eagle-0.4.0-incubating-bin.tar.gz
$ mv apache-eagle-0.4.0-incubating eagle
$ mv eagle /usr/
$ cd /usr/eagle
2.2 配置conf/eagle-service.conf
eagle {
service {
storage-type="jdbc"
storage-adapter="mysql"
storage-username="eagle"
storage-password=eagle
storage-database=eagle
storage-connection-url="jdbc:mysql://hadoop.slave1:3306/eagle"
storage-connection-props="encoding=UTF-8"
storage-driver-class="com.mysql.jdbc.Driver"
storage-connection-max=8
}
}
# set EAGLE_HOME
export EAGLE_HOME=$(dirname $0)/..
# The java implementation to use. please use jdk 1.7 or later
# export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=/usr/share/jdk1.7.0_79
# nimbus.host, default is localhost
export EAGLE_NIMBUS_HOST=localhost
# EAGLE_SERVICE_HOST, default is `hostname -f`
export EAGLE_SERVICE_HOST=localhost
# EAGLE_SERVICE_PORT, default is 9099
export EAGLE_SERVICE_PORT=9099
# EAGLE_SERVICE_USER
export EAGLE_SERVICE_USER=admin
# EAGLE_SERVICE_PASSWORD
export EAGLE_SERVICE_PASSWD=secret
export EAGLE_CLASSPATH=$EAGLE_HOME/conf
# Add eagle shared library jars
for file in $EAGLE_HOME/lib/share/*;do
EAGLE_CLASSPATH=$EAGLE_CLASSPATH:$file
done
# Add eagle storm library jars
# Separate out of share directory because of asm version conflict
export EAGLE_STORM_CLASSPATH=$EAGLE_CLASSPATH
for file in $EAGLE_HOME/lib/storm/*;do
EAGLE_STORM_CLASSPATH=$EAGLE_STORM_CLASSPATH:$file
done
### scheduler propertise
appCommandLoaderEnabled = false
appCommandLoaderIntervalSecs = 1
appHealthCheckIntervalSecs = 5
### execution platform properties
envContextConfig.env = "storm"
envContextConfig.url = "http://hadoop.slave1:8744" # storm 的ui 地址
envContextConfig.nimbusHost = "hadoop.slave1" #storm 主机,不要写localhost
envContextConfig.nimbusThriftPort = 6627 # thrift 服务端口
envContextConfig.jarFile = "/usr/eagle/lib/topology/eagle-topology-0.4.0-incubating-assembly.jar" # 实际jar所在路径
### default topology properties
eagleProps.mailHost = "mailHost.com"
eagleProps.mailSmtpPort = "25"
eagleProps.mailDebug = "true"
eagleProps.eagleService.host = "localhost"
eagleProps.eagleService.port = 9099
eagleProps.eagleService.username = "admin"
eagleProps.eagleService.password = "secret"
eagleProps.dataJoinPollIntervalSec = 30
dynamicConfigSource.enabled = true
dynamicConfigSource.initDelayMillis = 0
dynamicConfigSource.delayMillis = 30000
2.5 启动eagle-service
[root@hadoop eagle]# bin/eagle-service.sh start
Starting eagle service ...
Eagle service started.
浏览器打开(ip为eagle所在ip) http://192.168.222.136:9099/eagle-service
用户名/密码:admin/sceret 可以访问
三、HDFS日志输出到卡夫卡 How to stream hdfs log data into Kafka
见官网:http://eagle.apache.org/docs/import-hdfs-auditLog.html,官网提供两个方案,本次采用logstash-kafka方式
提前下载logstash,解压
3.1 Create a Kafka topic as the streaming input.
[root@hadoop kafka-broker]# bin/kafka-topics.sh --create --zookeeper hadoop.master:2181,hadoop.slave1:2181,hadoop.slave2:2181 --replication-factor 1 --partitions 1 --topic sandbox_hdfs_audit_log
WARNING: Due to limitations in metric names, topics with a period ('.') or underscore ('_') could collide. To avoid issues it is best to use either, but not both.
Created topic "sandbox_hdfs_audit_log".
[root@hadoop kafka-broker]# bin/kafka-topics.sh --list --zookeeper hadoop.master:2181,hadoop.slave1:2181,hadoop.slave2:2181
sandbox_hdfs_audit_log
3.2 Install Logstash-kafka plugin
For Logstash 1.5.x, logstash-kafka has been intergrated into logstash-input-kafka and logstash-output-kafka, and released with the 1.5 version of Logstash. So you can directly use it.
For Logstash 1.4.x, a user should install logstash-kafka firstly. Notice that this version does not support partition_key_format.
本次采用logstash 2.4 ,已含此插件
[root@hadoop conf]# pwd
/root/logstash-2.4.0/conf
[root@hadoop conf]# ls
hdfs-audit.conf
[root@hadoop conf]# cat hdfs-audit.conf
input {
file {
type => "hdp-nn-audit"
path => "/var/log/audit/audit.log" # hdfs audit 日志路径
start_position => end
sincedb_path => "/var/log/logstash"
}
}
filter{
if [type] == "hdp-nn-audit" {
grok {
match => ["message", "ugi=(?([\w\d\-]+))@|ugi=(?([\w\d\-]+))/[\w\d\-.]+@|ugi=(?([\w\d.\-_]+))[\s(]+"]
}
}
}
output {
if [type] == "hdp-nn-audit" {
kafka {
codec => plain {
format => "%{message}"
}
bootstrap_servers => "192.168.222.136:9092" #kafka 地址
topic_id => "sandbox_hdfs_audit_log" #topic
timeout_ms => 10000
retries => 3
client_id => "hdp-nn-audit"
}
# stdout { codec => rubydebug }
}
}
3.3 启动logstash
[root@hadoop logstash-2.4.0]# bin/logstash -f conf/hdfs-audit.conf
Settings: Default pipeline workers: 1
Pipeline main started
3.4 问题
kafka配置文件config/server.properties
,发现修改以下配置
#host.name=localhost
#advertised.host.name=
改为自己主机的ip,新版改为如下配置:
[root@Flyme-PostPay-32-99 config]# cat server.properties
############################# Server Basics #############################
# The id of the broker. This must be set to a unique integer for each broker.
broker.id=0
############################# Socket Server Settings #############################
# The address the socket server listens on. It will get the value returned from
# java.net.InetAddress.getCanonicalHostName() if not configured.
# FORMAT:
# listeners = security_protocol://host_name:port
# EXAMPLE:
# listeners = PLAINTEXT://your.host.name:9092
listeners=PLAINTEXT://:9092
# Hostname and port the broker will advertise to producers and consumers. If not set,
# it uses the value for "listeners" if configured. Otherwise, it will use the value
# returned from java.net.InetAddress.getCanonicalHostName().
advertised.listeners=PLAINTEXT://192.168.222.136:9092 #改为主机ip
# The number of threads handling network requests
num.network.threads=3
# The number of threads doing disk I/O
num.io.threads=8
四 配置、启动topology
4.1 初始化
topology
初始化之前,先修改初始化参数
vi bin/eagle-topology-init.sh
#hdfs 地址
classification.fs.defaultFS=hdfs://hadoop.master:8020
classification.hbase.zookeeper.property.clientPort=2181\nclassification.hbase.zookeeper.quorum=localhost
#hive元数据库
classification.accessType=metastoredb_jdbc\nclassification.password=hive\nclassification.user=hive\nclassification.jdbcDriverClassName=com.mysql.jdbc.Driver\nclassification.jdbcUrl=jdbc:mysql://hadoop.slave1/hive?createDatabaseIfNotExist=true
classification.accessType=oozie_api\nclassification.oozieUrl=http://hadoop.master:11000/oozie\nclassification.filter=status=RUNNING\nclassification.authType=SIMPLE
以上配置,均改成实际需要监控的地址
然后执行:bin/
eagle-topology-init.sh
执行之前,保证 eagle-service.sh服务启动状态,可以执行。执行完毕,无报错,全部OK才行。可以重复执行。
4.2 配置sandbox-hdfsAuditLog-application.conf (此配置给storm执行topology用)
{
"envContextConfig" : {
"env" : "storm",
"mode" : "cluster",
"topologyName" : "sandbox-hdfsAuditLog-topology",
"stormConfigFile" : "security-auditlog-storm.yaml",
"parallelismConfig" : {
"kafkaMsgConsumer" : 1,
"hdfsAuditLogAlertExecutor*" : 1
}
},
"dataSourceConfig": {
"topic" : "sandbox_hdfs_audit_log", # 和logstash 创建的topic一致
"zkConnection" : "hadoop.master:2181,hadoop.slave1:2181,hadoop.slave2:2181", # zk 地址
"brokerZkPath" : "/brokers",
"zkConnectionTimeoutMS" : 15000,
"fetchSize" : 1048586,
"deserializerClass" : "org.apache.eagle.security.auditlog.HdfsAuditLogKafkaDeserializer",
"transactionZKServers" : "hadoop.master,hadoop.slave1,hadoop.slave2",
"transactionZKPort" : 2181,
"transactionZKRoot" : "/consumers",
"consumerGroupId" : "eagle.hdfsaudit.consumer",
"transactionStateUpdateMS" : 2000
},
"alertExecutorConfigs" : {
"hdfsAuditLogAlertExecutor" : {
"parallelism" : 1,
"partitioner" : "org.apache.eagle.policy.DefaultPolicyPartitioner",
"needValidation" : "true"
}
},
"eagleProps" : {
"site" : "sandbox",
"application": "hdfsAuditLog",
"dataJoinPollIntervalSec" : 30,
"mailHost" : "mailHost.com",
"mailSmtpPort":"25",
"mailDebug" : "true",
"eagleService": {
"host": "hadoop.slave1", # eagle服务地址,不要写localhost,这个配置文件是给storm的worker线程用的
"port": 9099
"username": "admin",
"password": "secret"
}
},
"dynamicConfigSource" : {
"enabled" : true,
"initDelayMillis" : 0,
"delayMillis" : 30000
}
}
4.3 启动
topology
启动其他topology需要指定main和config,如下(需要提前HIVE query logs into Eagle platform):
bin/eagle-topology.sh --main org.apache.eagle.security.hive.jobrunning.HiveJobRunningMonitoringMain --config conf/sandbox-hiveQueryLog-application.conf start