1. 提交路径:从SchedulerJob类中的def _execute_helper(self)方法中的self.executor.heartbeat()开始看
到BaseExecutor类的def heartbeat(self)方法:
def heartbeat(self):
# Triggering new jobs
if not self.parallelism:
open_slots = len(self.queued_tasks)
else:
open_slots = self.parallelism - len(self.running)
self.log.debug("%s running task instances", len(self.running))
self.log.debug("%s in queue", len(self.queued_tasks))
self.log.debug("%s open slots", open_slots)
sorted_queue = sorted(
[(k, v) for k, v in self.queued_tasks.items()],
key=lambda x: x[1][1],
reverse=True)
for i in range(min((open_slots, len(self.queued_tasks)))):
key, (command, _, queue, ti) = sorted_queue.pop(0)
# TODO(jlowin) without a way to know what Job ran which tasks,
# there is a danger that another Job started running a task
# that was also queued to this executor. This is the last chance
# to check if that happened. The most probable way is that a
# Scheduler tried to run a task that was originally queued by a
# Backfill. This fix reduces the probability of a collision but
# does NOT eliminate it.
self.queued_tasks.pop(key)
ti.refresh_from_db()
// 此处的command是一条cli命令
if ti.state != State.RUNNING:
self.running[key] = command
self.execute_async(key=key,
command=command,
queue=queue,
executor_config=ti.executor_config)
else:
self.log.info(
'Task is already running, not sending to '
'executor: {}'.format(key))
# Calling child class sync method
self.log.debug("Calling the %s sync method", self.__class__)
self.sync()
到CeleryExecutor类(此处以Celery为例)的execute_async方法(提交命令到队列):
def execute_async(self, key, command,
queue=DEFAULT_CELERY_CONFIG['task_default_queue'],
executor_config=None):
self.log.info("[celery] queuing {key} through celery, "
"queue={queue}".format(**locals()))
self.tasks[key] = execute_command.apply_async(
args=[command], queue=queue)
self.last_state[key] = celery_states.PENDING
再通过celery_executor.py的 execute_command(command)方法消费提交到队列中的数据:
@app.task
def execute_command(command):
log = LoggingMixin().log
log.info("Executing command in Celery: %s", command)
env = os.environ.copy()
try:
subprocess.check_call(command, shell=True, stderr=subprocess.STDOUT,
close_fds=True, env=env)
except subprocess.CalledProcessError as e:
log.exception('execute_command encountered a CalledProcessError')
log.error(e.output)
raise AirflowException('Celery command failed')
2. command命令是怎么生成的
从SchedulerJob类中的def _execute_helper(self)开始
到SchedulerJob类中的_execute_task_instances方法:
def _execute_task_instances(self,
simple_dag_bag,
states,
session=None):
"""
Attempts to execute TaskInstances that should be executed by the scheduler.
There are three steps:
1. Pick TIs by priority with the constraint that they are in the expected states
and that we do exceed max_active_runs or pool limits.
2. Change the state for the TIs above atomically.
3. Enqueue the TIs in the executor.
:param simple_dag_bag: TaskInstances associated with DAGs in the
simple_dag_bag will be fetched from the DB and executed
:type simple_dag_bag: SimpleDagBag
:param states: Execute TaskInstances in these states
:type states: Tuple[State]
:return: None
"""
executable_tis = self._find_executable_task_instances(simple_dag_bag, states,
session=session)
def query(result, items):
tis_with_state_changed = self._change_state_for_executable_task_instances(
items,
states,
session=session)
self._enqueue_task_instances_with_queued_state(
simple_dag_bag,
tis_with_state_changed)
session.commit()
return result + len(tis_with_state_changed)
return helpers.reduce_in_chunks(query, executable_tis, 0, self.max_tis_per_query)
到def _enqueue_task_instances_with_queued_state方法:
def _enqueue_task_instances_with_queued_state(self, simple_dag_bag, task_instances):
"""
Takes task_instances, which should have been set to queued, and enqueues them
with the executor.
:param task_instances: TaskInstances to enqueue
:type task_instances: List[TaskInstance]
:param simple_dag_bag: Should contains all of the task_instances' dags
:type simple_dag_bag: SimpleDagBag
"""
TI = models.TaskInstance
# actually enqueue them
# TI.generate_command生成command字符串
for task_instance in task_instances:
simple_dag = simple_dag_bag.get_dag(task_instance.dag_id)
command = " ".join(TI.generate_command(
task_instance.dag_id,
task_instance.task_id,
task_instance.execution_date,
local=True,
mark_success=False,
ignore_all_deps=False,
ignore_depends_on_past=False,
ignore_task_deps=False,
ignore_ti_state=False,
pool=task_instance.pool,
file_path=simple_dag.full_filepath,
pickle_id=simple_dag.pickle_id))
priority = task_instance.priority_weight
queue = task_instance.queue
self.log.info(
"Sending %s to executor with priority %s and queue %s",
task_instance.key, priority, queue
)
# save attributes so sqlalchemy doesnt expire them
copy_dag_id = task_instance.dag_id
copy_task_id = task_instance.task_id
copy_execution_date = task_instance.execution_date
make_transient(task_instance)
task_instance.dag_id = copy_dag_id
task_instance.task_id = copy_task_id
task_instance.execution_date = copy_execution_date
self.executor.queue_command(
task_instance,
command,
priority=priority,
queue=queue)
到TaskInstance类的generate_command方法:
def generate_command(dag_id,
task_id,
execution_date,
mark_success=False,
ignore_all_deps=False,
ignore_depends_on_past=False,
ignore_task_deps=False,
ignore_ti_state=False,
local=False,
pickle_id=None,
file_path=None,
raw=False,
job_id=None,
pool=None,
cfg_path=None
):
"""
Generates the shell command required to execute this task instance.
:param dag_id: DAG ID
:type dag_id: unicode
:param task_id: Task ID
:type task_id: unicode
:param execution_date: Execution date for the task
:type execution_date: datetime
:param mark_success: Whether to mark the task as successful
:type mark_success: bool
:param ignore_all_deps: Ignore all ignorable dependencies.
Overrides the other ignore_* parameters.
:type ignore_all_deps: boolean
:param ignore_depends_on_past: Ignore depends_on_past parameter of DAGs
(e.g. for Backfills)
:type ignore_depends_on_past: boolean
:param ignore_task_deps: Ignore task-specific dependencies such as depends_on_past
and trigger rule
:type ignore_task_deps: boolean
:param ignore_ti_state: Ignore the task instance's previous failure/success
:type ignore_ti_state: boolean
:param local: Whether to run the task locally
:type local: bool
:param pickle_id: If the DAG was serialized to the DB, the ID
associated with the pickled DAG
:type pickle_id: unicode
:param file_path: path to the file containing the DAG definition
:param raw: raw mode (needs more details)
:param job_id: job ID (needs more details)
:param pool: the Airflow pool that the task should run in
:type pool: unicode
:param cfg_path: the Path to the configuration file
:type cfg_path: basestring
:return: shell command that can be used to run the task instance
"""
iso = execution_date.isoformat()
cmd = ["airflow", "run", str(dag_id), str(task_id), str(iso)]
cmd.extend(["--mark_success"]) if mark_success else None
cmd.extend(["--pickle", str(pickle_id)]) if pickle_id else None
cmd.extend(["--job_id", str(job_id)]) if job_id else None
cmd.extend(["-A"]) if ignore_all_deps else None
cmd.extend(["-i"]) if ignore_task_deps else None
cmd.extend(["-I"]) if ignore_depends_on_past else None
cmd.extend(["--force"]) if ignore_ti_state else None
cmd.extend(["--local"]) if local else None
cmd.extend(["--pool", pool]) if pool else None
cmd.extend(["--raw"]) if raw else None
cmd.extend(["-sd", file_path]) if file_path else None
cmd.extend(["--cfg_path", cfg_path]) if cfg_path else None
return cmd