BashOperator主要的功能是执行shell命令或者shell脚本。负责具体的执行过程的是BashOperator.execute()函数。
airflow的bash_operator.py文件:
from builtins import bytes
import os
import signal
from subprocess import Popen, STDOUT, PIPE
from tempfile import gettempdir, NamedTemporaryFile
from airflow.exceptions import AirflowException
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from airflow.utils.file import TemporaryDirectory
class BashOperator(BaseOperator):
"""
:param xcom_push: If xcom_push is True, the last line written to stdout
will also be pushed to an XCom when the bash command completes.
:type xcom_push: bool
:param env: If env is not None, it must be a mapping that defines the
environment variables for the new process; these are used instead
of inheriting the current process environment, which is the default
behavior. (templated)
:type env: dict
:type output_encoding: output encoding of bash command
"""
template_fields = ('bash_command', 'env')
template_ext = ('.sh', '.bash',)
ui_color = '#f0ede4'
@apply_defaults # 处理默认的参数
def __init__(
self,
bash_command, # string 可以是单独的命令,或者是命令集,或者是.sh文件
xcom_push=False,
env=None,
output_encoding='utf-8',
*args, **kwargs):
super(BashOperator, self).__init__(*args, **kwargs)
self.bash_command = bash_command
self.env = env
self.xcom_push_flag = xcom_push
self.output_encoding = output_encoding
def execute(self, context):
"""
Execute the bash command in a temporary directory
which will be cleaned afterwards
"""
bash_command = self.bash_command
self.log.info("Tmp dir root location: \n %s", gettempdir())
with TemporaryDirectory(prefix='airflowtmp') as tmp_dir:
with NamedTemporaryFile(dir=tmp_dir, prefix=self.task_id) as f:
f.write(bytes(bash_command, 'utf_8'))
f.flush()
fname = f.name
script_location = tmp_dir + "/" + fname
self.log.info(
"Temporary script location: %s",
script_location
)
def pre_exec():
# Restore default signal disposition and invoke setsid
for sig in ('SIGPIPE', 'SIGXFZ', 'SIGXFSZ'):
if hasattr(signal, sig):
signal.signal(getattr(signal, sig), signal.SIG_DFL)
os.setsid()
self.log.info("Running command: %s", bash_command)
sp = Popen(
['bash', fname],
stdout=PIPE, stderr=STDOUT,
cwd=tmp_dir, env=self.env,
preexec_fn=pre_exec)
self.sp = sp
self.log.info("Output:")
line = ''
for line in iter(sp.stdout.readline, b''):
line = line.decode(self.output_encoding).strip()
self.log.info(line)
sp.wait()
self.log.info(
"Command exited with return code %s",
sp.returncode
)
if sp.returncode:
raise AirflowException("Bash command failed")
if self.xcom_push_flag:
return line
def on_kill(self):
self.log.info('Sending SIGTERM signal to bash process group')
os.killpg(os.getpgid(self.sp.pid), signal.SIGTERM)
TemporaryDirectory:创建一个临时的目录,它使用了@contextmanager,生成了一个上下文管理器,因此它能用在with环境里。用contextmanager装饰的函数,要返回一个生成器,并且只能返回一个值
@contextmanager # 生成一个上下文管理器
def TemporaryDirectory(suffix='', prefix=None, dir=None):
name = mkdtemp(suffix=suffix, prefix=prefix, dir=dir) # suffix:后缀 prefix: 前缀
try:
yield name # yield 生成器 仅返回一个值
finally:
try:
shutil.rmtree(name) # 当with结束之后,删除临时目录
except OSError as e:
# ENOENT - no such file or directory
if e.errno != errno.ENOENT:
raise e
NamedTemporaryFile: 创建一个临时的文件,它继承了一个类,这个类实现了__enter__, exit 方法,因此能用with
pre_exec: 捕捉信号,并进行信号处理
subprocess.Popen: 具体执行command或者shell脚本