下载Oozie工作流
http://oozie.apache.org/
oozie-4.2.0
下载依赖
http://oozie.apache.org/docs/4.2.0/DG_QuickStart.html
ext-2.2.zip
解压后
tar -zxvf oozie-4.2.0.tar.gz
cd oozie-4.2.0
修改POM文件里的以下参数
1.7
因为不支持最新的Hive和Spark其他的版本保持默认
修改hadoop-2的版本
hadoop-2
<repository>
<id>codehaus-mule-repo</id>
<url>https://repository-master.mulesoft.org/nexus/content/groups/public/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
bin/mkdistro.sh -P hadoop-2 -DskipTests
或
mvn clean package assembly:single -P hadoop-2 -DskipTests
将./oozie-4.2.0/distro/target/oozie-4.2.0-distro.tar.gz 拷贝出来解压
tar -zxvf oozie-4.2.0-distro.tar.gz /usr/local/
mv oozie-4.2.0 oozie
vim /etc/profile
export OOZIE_HOME=/usr/local/oozie
export OOZIE_CONFIG=/usr/local/oozie/conf
PATH=$OOZIE_HOME/bin:$PATH
复制依赖包
mkdir -p /usr/local/oozie/libext
cp /usr/local/src/mysql-connector-java-5.1.35.jar /usr/local/oozie/libext/
cp ext-2.2.zip /usr/local/oozie/libext/
将hadoop客户端的jar包考进去
cd $HADOOP_HOME
find -name *.jar |xargs -t -i cp {} /usr/local/oozie/libext
注意:需要将libext中的hsqldb-2.0.0.jar删掉,否则会引起jar包冲突。
rm /usr/local/oozie/libext/hsqldb-2.0.0.jar
cd $OOZIE_HOME
addtowar.sh -inputwar oozie.war -outputwar oozie-server/webapps/oozie.war -hadoop 2.6.0 /usr/local/hadoop/ -extjs libext/ext-2.2.zip -jars /usr/local/oozie/libext/mysql-connector-java-5.1.35.jar:/usr/local/oozie/libext/htrace-core-3.0.4.jar:/usr/local/oozie/libext/avro-1.7.4.jar
在oozie-4.2.0目录下有一个 oozie-sharelib-4.2.0.tar.gz。使用tar -zxvf oozie-sharelib-4.2.0.tar.gz将它解压
将这个share上传到hdfs上
hdfs dfs -put /usr/local/oozie/share /user/hadoop
配置/usr/local/oozie/conf/oozie-site.xml
<property>
<name>oozie.service.JPAService.jdbc.driver</name>
<value>com.mysql.jdbc.Driver</value>
<description>
JDBC driver class.
</description>
</property>
<property>
<name>oozie.service.JPAService.jdbc.url</name>
<value>jdbc:mysql://master:3306/oozie</value>
<description>
JDBC URL.
</description>
</property>
<property>
<name>oozie.service.JPAService.jdbc.username</name>
<value>oozie</value>
<description>
DB user name.
</description>
</property>
<property>
<name>oozie.service.JPAService.jdbc.password</name>
<value>oozie</value>
<description>
DB user password.
IMPORTANT: if password is emtpy leave a 1 space string, the service trims the value,
if empty Configuration assumes it is NULL.
</description>
</property>
<property>
<name>oozie.service.HadoopAccessorService.hadoop.configurations</name>
<value>*=/usr/local/hadoop/etc/hadoop</value>
<description>
Comma separated AUTHORITY=HADOOP_CONF_DIR, where AUTHORITY is the HOST:PORT of
the Hadoop service (JobTracker, YARN, HDFS). The wildcard '*' configuration is
used when there is no exact match for an authority. The HADOOP_CONF_DIR contains
the relevant Hadoop *-site.xml files. If the path is relative is looked within
the Oozie configuration directory; though the path can be absolute (i.e. to point
to Hadoop client conf/ directories in the local filesystem.
</description>
</property>
<property>
<name>oozie.service.WorkflowAppService.system.libpath</name>
<value>hdfs://master:8020/user/hadoop/share/lib</value>
<description>
System library path to use for workflow applications.
This path is added to workflow application if their job properties sets
the property 'oozie.use.system.libpath' to true.
</description>
</property>
<property>
<name>oozie.service.PurgeService.older.than</name>
<value>1</value>
<description>
Completed workflow jobs older than this value, in days, will be purged by the PurgeService.
</description>
</property>
<property>
<name>oozie.service.PurgeService.coord.older.than</name>
<value>1</value>
<description>
Completed coordinator jobs older than this value, in days, will be purged by the PurgeService.
</description>
</property>
<property>
<name>oozie.service.PurgeService.bundle.older.than</name>
<value>1</value>
<description>
Completed bundle jobs older than this value, in days, will be purged by the PurgeService.
</description>
</property>
<property>
<name>oozie.processing.timezone</name>
<value>GMT+0800</value>
<description>
Oozie server timezone. Valid values are UTC and GMT(+/-)####, for example 'GMT+0530' would be India
timezone. All dates parsed and genered dates by Oozie Coordinator/Bundle will be done in the specified
timezone. The default value of 'UTC' should not be changed under normal circumtances. If for any reason
is changed, note that GMT(+/-)#### timezones do not observe DST changes.
</description>
</property>
CREATE USER oozie IDENTIFIED BY ‘oozie’;
GRANT ALL PRIVILEGES ON . TO ‘oozie’@’%’ WITH GRANT OPTION;
flush privileges;
创建数据库
create database oozie;
生成Oozie所需要的数据表
bin/ooziedb.sh create -sqlfile oozie.sql -run
启动oozie
bin/oozie-start.sh
启动mr-jobhistoryserver
mr-jobhistory-daemon.sh start historyserver
查看启动状态
oozie admin -oozie http://localhost:11000/oozie -status
如果是System model:Normal,表明启动成功,否则失败。