Installing from Docker Images
https://airflow.apache.org/docs/docker-stack/index.html
# apt安装 包
FROM apache/airflow:2.5.1
USER root
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
vim \
&& apt-get autoremove -yqq --purge \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
USER airflow
# pypi 安装包
FROM apache/airflow:2.5.1
RUN pip install --no-cache-dir lxml
# 用requirements.txt安装包
FROM apache/airflow:2.5.1
COPY requirements.txt /
RUN pip install --no-cache-dir -r /requirements.txt
# Embedding DAGs 嵌入加载dag
FROM apache/airflow:2.5.1
# 把test_dag.py以用户airflow 用户组root copy到镜像/opt/airflow/dags中
COPY --chown=airflow:root test_dag.py /opt/airflow/dags
test_dag.py如下
import datetime
import pendulum
from airflow.models.dag import DAG
from airflow.operators.empty import EmptyOperator
now = pendulum.now(tz="UTC")
now_to_the_hour = (now - datetime.timedelta(0, 0, 0, 0, 0, 3)).replace(minute=0, second=0, microsecond=0)
START_DATE = now_to_the_hour
DAG_NAME = "test_dag_v1"
dag = DAG(
DAG_NAME,
schedule="*/10 * * * *",
default_args={"depends_on_past": True},
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
catchup=False,
)
run_this_1 = EmptyOperator(task_id="run_this_1", dag=dag)
run_this_2 = EmptyOperator(task_id="run_this_2", dag=dag)
run_this_2.set_upstream(run_this_1)
run_this_3 = EmptyOperator(task_id="run_this_3", dag=dag)
run_this_3.set_upstream(run_this_2)
启用Buildkit来构建映像
DOCKER_BUILDKIT=1
build镜像时需要添加参数--build-arg DOCKER_CONTEXT_FILES=docker-context-files
mkdir -p docker-context-files
cat <./docker-context-files/requirements.txt
beautifulsoup4==4.10.0
EOF
export DOCKER_BUILDKIT=1
docker build . \
--build-arg DOCKER_CONTEXT_FILES=./docker-context-files \
--tag "my-beautifulsoup4-airflow:0.0.1"
docker run -it my-beautifulsoup4-airflow:0.0.1 python -c 'import bs4; import sys; sys.exit(0)' && \
echo "Success! Beautifulsoup4 installed" && echo
比如配置pip.conf
mkdir -p docker-context-files
cat <./docker-context-files/pip.conf
[global]
verbose = 2
EOF
export DOCKER_BUILDKIT=1
docker build . \
--build-arg DOCKER_CONTEXT_FILES=./docker-context-files \
--tag "my-custom-pip-verbose-airflow:0.0.1"
docker run -it my-beautifulsoup4-airflow:0.0.1 python -c 'import bs4; import sys; sys.exit(0)' && \
echo "Success! Beautifulsoup4 installed" && echo
替换.whl
mkdir -p docker-context-files
export AIRFLOW_VERSION="2.2.4"
rm docker-context-files/*.whl docker-context-files/*.tar.gz docker-context-files/*.txt || true
curl -Lo "docker-context-files/constraints-3.7.txt" \
"https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.7.txt"
echo
echo "Make sure you use the right python version here (should be same as in constraints)!"
echo
python --version
pip download --dest docker-context-files \
--constraint docker-context-files/constraints-3.7.txt \
"apache-airflow[async,celery,elasticsearch,kubernetes,postgres,redis,ssh,statsd,virtualenv]==${AIRFLOW_VERSION}"
添加extra
export AIRFLOW_VERSION=2.3.4
export DOCKER_BUILDKIT=1
docker build . \
--pull \
--build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bullseye" \
--build-arg AIRFLOW_VERSION="${AIRFLOW_VERSION}" \
--build-arg ADDITIONAL_AIRFLOW_EXTRAS="mssql,hdfs" \
--build-arg ADDITIONAL_PYTHON_DEPS="oauth2client" \
--tag "my-pypi-extras-and-deps:0.0.1"
export AIRFLOW_VERSION=2.2.4
export DOCKER_BUILDKIT=1
docker build . \
--pull \
--build-arg PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" \
--build-arg AIRFLOW_VERSION="${AIRFLOW_VERSION}" \
--build-arg ADDITIONAL_PYTHON_DEPS="mpi4py" \
--build-arg ADDITIONAL_DEV_APT_DEPS="libopenmpi-dev" \
--build-arg ADDITIONAL_RUNTIME_APT_DEPS="openmpi-common" \
--tag "my-build-essential-image:0.0.1"
通过github
export DOCKER_BUILDKIT=1
docker build . \
--pull \
--build-arg PYTHON_BASE_IMAGE="python:3.7-slim-bullseye" \
--build-arg AIRFLOW_INSTALLATION_METHOD="https://github.com/apache/airflow/archive/main.tar.gz#egg=apache-airflow" \
--build-arg AIRFLOW_CONSTRAINTS_REFERENCE="constraints-main" \
--tag "my-github-main:0.0.1"
Build args参考
https://airflow.apache.org/docs/docker-stack/build-arg-ref.html