运行环境:
python 3.6.6
apache airflow 1.10.1
mysql 5.7.24
centos7:
--准备工作:
rpm -ivh https://centos7.iuscommunity.org/ius-release.rpm
yum install epel-release
或者
rpm -ivh https://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/epel-release-7-11.noarch.rpm
--安装python36版本:
# yum -y install python36u-pip python36 python36-libs
-- 升级pip版本:
# pip3.6 install --upgrade pip
# pip --version
pip 18.1 from /usr/local/lib/python3.6/site-packages/pip (python 3.6)
# pip list
Package Version
---------- -------
pip 18.1
setuptools 39.0.1
--安装依赖的软件包:
# yum -y install gcc gcc-c++ cyrus-sasl cyrus-sasl-devel cyrus-sasl-lib
--安装mysql:
# wget http://repo.mysql.com/mysql57-community-release-el7-11.noarch.rpm
# rpm -ivh http://repo.mysql.com/mysql57-community-release-el7-11.noarch.rpm
# yum -y install mysql-community-server mysql-community-devel mysql-community-common
--配置文件:
#cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/tmp/mysql.sock
default-storage-engine =InnoDB
character_set_server = utf8mb4
character-set-client-handshake = FALSE
character-set-server = utf8mb4
collation-server = utf8mb4_unicode_ci
init_connect ='SET NAMES utf8mb4'
lower_case_table_names = 1
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
#skip-grant-tables
plugin-load=validate_password.so
validate-password=OFF
log-error=/var/log/mysqld.log
pid-file=/tmp/mysqld.pid
explicit_defaults_for_timestamp=true
-- 登录数据库:
systemctl start mysqld
#mysql -p -S /tmp/mysql.sock
--安装完成之后创建库和用户密码:
mysql> alter user root@'localhost' identified by 'xyz';
Query OK, 0 rows affected (0.00 sec)
mysql> grant all privileges on *.* to root@'%' identified by 'xyz' with grant option;
Query OK, 0 rows affected, 1 warning (0.00 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.00 sec)
create database airflow;
create user 'airflow'@'%' identified by 'airflow';
create user 'airflow'@'localhost' identified by 'airflow';
grant all on airflow.* to 'airflow'@'%';
grant all privileges on *.* to 'airflow'@'%';
flush privileges;
--查询airflow的文件:
# pip show --files apache-airflow
--airflow安装的路径:
# find / -name airflow
/usr/local/bin/airflow
/usr/local/lib/python3.6/site-packages/airflow
/usr/local/lib/python3.6/site-packages/airflow/bin/airflow
/usr/local/lib/python3.6/site-packages/airflow/www/templates/airflow
/usr/local/lib/python3.6/site-packages/airflow/www_rbac/templates/airflow
一般的安装路径位于:
airflow会被安装到Python下的第三方包中,路径一般为${PYTHON_HOME}/lib/python3.6/sit-packages/airflow.
---创建airflow的配置文件:
# mkdir -p /etc/airflow
# cat /etc/airflow/airflow.cfg
sql_alchemy_conn = mysql://airflow:airflow@localhost:3306/airflow
设置airflow的文件路径:
# export AIRFLOW_HOME=/etc/airflow
--初始化数据库:
# yum -y install mysql-community-server mysql-community-devel
---安装airflow软件:
#export SLUGIFY_USES_TEXT_UNIDECODE=yes
# pip3 install apache-airflow[all]
--版本查询:
# pip list | grep -i airflow
apache-airflow 1.10.1
--初始化:
export AIRFLOW_HOME=/etc/airflow
# airflow initdb
--启动airflow:
nohup airflow scheduler &
nohup airflow webserver -p 8080 &
默认使用8080端口。
---附加:
使用用户和密码登录:
0.安装插件:
#pip install apache-airflow[password]
1.配置文件修改 airflow.cfg
[webserver]
authenticate = True
auth_backend = airflow.contrib.auth.backends.password_auth
2.python设置:
import airflow
from airflow import models, settings
from airflow.contrib.auth.backends.password_auth import PasswordUser
user = PasswordUser(models.User())
user.username = 'admin'
user.email = '[email protected]'
user.password = 'admin'
session = settings.Session()
session.add(user)
session.commit()
session.close()
exit()
-- 修改时间:
Airflow默认的时间是GMT时间,比北京时间早8小时。
1.
2.修改webserver界面右上角当前时间:
vim /usr/local/lib/python3.6/site-packages/airflow/www/templates/admin/master.html
将行内容修改为:
//var UTCseconds = (x.getTime() + x.getTimezoneOffset()*60*1000);
如下:
var UTCseconds = x.getTime() //+ x.getTimezoneOffset()*60*1000);
3.修改webserver lastRun时间
在方法get_last_dagrun之前加上方法utc2local:如下
def utc2local(self,utc):
import time
epoch = time.mktime(utc.timetuple())
offset = datetime.fromtimestamp(epoch) - datetime.utcfromtimestamp(epoch)
return utc + offset
@provide_session
def get_last_dagrun(self, session=None, include_externally_triggered=False):
DR = DagRun
qry = session.query(DR).filter(
DR.dag_id == self.dag_id,
)
if not include_externally_triggered:
qry = qry.filter(DR.external_trigger.__eq__(False))
qry = qry.order_by(DR.execution_date.desc())
last = qry.first()
return last
4.修改文件:/usr/local/lib/python3.6/site-packages/airflow/www/templates/airflow/dags.html 中
last_run.execution_date..strftime("%Y-%m-%d %H:%M")和last_run.start_date.strftime("%Y-%m-%d %H:%M")分别为:
dag.utc2local(last_run.execution_date).strftime("%Y-%m-%d %H:%M")
dag.utc2local(last_run.start_date).strftime("%Y-%m-%d %H:%M")
示例如下:
之前的代码:
{% if dag %}
{% set last_run = dag.get_last_dagrun(include_externally_triggered=True) %}
{% if last_run and last_run.execution_date %}
{{ last_run.execution_date.strftime("%Y-%m-%d %H:%M") }}
{% endif %}
{% endif %}
-- 修改后:
{% if dag %}
{% set last_run = dag.get_last_dagrun(include_externally_triggered=True) %}
{% if last_run and last_run.execution_date %}
{{ dag.utc2local(last_run.execution_date).strftime("%Y-%m-%d %H:%M") }}
{% endif %}
{% endif %}
修改完成之后重启webserver
--若独立安装则至少需要的插件:
export SLUGIFY_USES_TEXT_UNIDECODE=yes
pip install apache-airflow
pip install apache-airflow[devel]
pip install apache-airflow[celery]
pip install apache-airflow[jdbc]
pip install apache-airflow[mysql]
pip install apache-airflow[password]
pip install apache-airflow[rabbitmq]
pip install apache-airflow[redis]
--文章参考:
http://airflow.apache.org/installation.html