airflow task 发送邮件

import airflow

from airflow import DAG
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
from airflow.operators.email_operator import EmailOperator
from airflow.operators.bash_operator import BashOperator
from airflow.operators.http_operator import SimpleHttpOperator
from airflow.operators.sensors import HttpSensor
import json
from datetime import timedelta
from datetime import datetime
from airflow.models import Variable
from airflow.utils.trigger_rule import TriggerRule

args = {
    'owner': 'airflow',
    'email': ['[email protected]'],
    'email_on_failure': True,
    'email_on_retry': True,
    'depends_on_past': False,
    'start_date': airflow.utils.dates.days_ago(0),
    'max_active_runs':10
}

dag = DAG(dag_id='TEST_DAG', default_args=args, schedule_interval='@once')

new_cluster = {
    'spark_version': '4.0.x-scala2.11',
    'node_type_id': 'Standard_D16s_v3',
    'num_workers': 3,
    'spark_conf':{
        'spark.hadoop.javax.jdo.option.ConnectionDriverName':'org.postgresql.Driver',
        .....
    },
    'custom_tags':{
        'ApplicationName':'TEST',
        .....
    }
}

t1 = DatabricksSubmitRunOperator(
  task_id='t1',
  dag=dag,
  new_cluster=new_cluster,
  ......
)

t2 = SimpleHttpOperator(
    task_id='t2',
    trigger_rule=TriggerRule.ONE_SUCCESS,
    method='POST',
    ........    
)

t2.set_upstream(t1)

t3 = SimpleHttpOperator(
    task_id='t3',
    trigger_rule=TriggerRule.ONE_SUCCESS,
    method='POST',
   .....
 )

t3.set_upstream(t2)

AllTaskSuccess = EmailOperator (
    dag=dag,
    trigger_rule=TriggerRule.ALL_SUCCESS,
    task_id="AllTaskSuccess",
    to=["[email protected]"],
    subject="All Task completed successfully",
    html_content='

All Task completed successfully"

') AllTaskSuccess.set_upstream([t1, t2,t3]) t1Failed = EmailOperator ( dag=dag, trigger_rule=TriggerRule.ONE_FAILED, task_id="t1Failed", to=["[email protected]"], subject="T1 Failed", html_content='

T1 Failed

') t1Failed.set_upstream([t1]) t2Failed = EmailOperator ( dag=dag, trigger_rule=TriggerRule.ONE_FAILED, task_id="t2Failed", to=["[email protected]"], subject="T2 Failed", html_content='

T2 Failed

') t2Failed.set_upstream([t2]) t3Failed = EmailOperator ( dag=dag, trigger_rule=TriggerRule.ONE_FAILED, task_id="t3Failed", to=["[email protected]"], subject="T3 Failed", html_content='

T3 Failed

') t3Failed.set_upstream([t3])

I managed it with the help of Airflow TriggerRule, Sample DAG given below :-

import airflow

from airflow import DAG
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
from airflow.operators.email_operator import EmailOperator
from airflow.operators.bash_operator import BashOperator
from airflow.operators.http_operator import SimpleHttpOperator
from airflow.operators.sensors import HttpSensor
import json
from datetime import timedelta
from datetime import datetime
from airflow.models import Variable
from airflow.utils.trigger_rule import TriggerRule

args = {
    'owner': 'airflow',
    'email': ['[email protected]'],
    'email_on_failure': True,
    'email_on_retry': True,
    'depends_on_past': False,
    'start_date': airflow.utils.dates.days_ago(0),
    'max_active_runs':10
}

dag = DAG(dag_id='TEST_DAG', default_args=args, schedule_interval='@once')

new_cluster = {
    'spark_version': '4.0.x-scala2.11',
    'node_type_id': 'Standard_D16s_v3',
    'num_workers': 3,
    'spark_conf':{
        'spark.hadoop.javax.jdo.option.ConnectionDriverName':'org.postgresql.Driver',
        .....
    },
    'custom_tags':{
        'ApplicationName':'TEST',
        .....
    }
}

t1 = DatabricksSubmitRunOperator(
  task_id='t1',
  dag=dag,
  new_cluster=new_cluster,
  ......
)

t2 = SimpleHttpOperator(
    task_id='t2',
    trigger_rule=TriggerRule.ONE_SUCCESS,
    method='POST',
    ........    
)

t2.set_upstream(t1)

t3 = SimpleHttpOperator(
    task_id='t3',
    trigger_rule=TriggerRule.ONE_SUCCESS,
    method='POST',
   .....
 )

t3.set_upstream(t2)

AllTaskSuccess = EmailOperator (
    dag=dag,
    trigger_rule=TriggerRule.ALL_SUCCESS,
    task_id="AllTaskSuccess",
    to=["[email protected]"],
    subject="All Task completed successfully",
    html_content='

All Task completed successfully"

') AllTaskSuccess.set_upstream([t1, t2,t3]) t1Failed = EmailOperator ( dag=dag, trigger_rule=TriggerRule.ONE_FAILED, task_id="t1Failed", to=["[email protected]"], subject="T1 Failed", html_content='

T1 Failed

') t1Failed.set_upstream([t1]) t2Failed = EmailOperator ( dag=dag, trigger_rule=TriggerRule.ONE_FAILED, task_id="t2Failed", to=["[email protected]"], subject="T2 Failed", html_content='

T2 Failed

') t2Failed.set_upstream([t2]) t3Failed = EmailOperator ( dag=dag, trigger_rule=TriggerRule.ONE_FAILED, task_id="t3Failed", to=["[email protected]"], subject="T3 Failed", html_content='

T3 Failed

') t3Failed.set_upstream([t3])

Trigger Rules

Though the normal workflow behavior is to trigger tasks when all their directly upstream tasks have succeeded, Airflow allows for more complex dependency settings.

All operators have a trigger_rule argument which defines the rule by which the generated task get triggered. The default value for trigger_rule is all_success and can be defined as “trigger this task when all directly upstream tasks have succeeded”. All other rules described here are based on direct parent tasks and are values that can be passed to any operator while creating tasks:

all_success: (default) all parents have succeeded

all_failed: all parents are in a failed or upstream_failed state

all_done: all parents are done with their execution

one_failed: fires as soon as at least one parent has failed, it does not wait for all parents to be done

one_success: fires as soon as at least one parent succeeds, it does not wait for all parents to be done

dummy: dependencies are just for show, trigger at will

Reference : https://airflow.apache.org/concepts.html

 

参考文档

https://stackoverflow.com/questions/51726248/airflow-dag-customized-email-on-any-of-the-task-failure

 

 

 

 

对于那些寻找使用带有EmailOperator的jinja模板的确切示例的人来说,这里有一个

from airflow.operators.email_operator import EmailOperator
from datetime import timedelta, datetime

email_task = EmailOperator(
    to='[email protected]',
    task_id='email_task',
    subject='Templated Subject: start_date {{ ds }}',
    params={'content1': 'random'},
    html_content="Templated Content: content1 - {{ params.content1 }}  task_key - {{ task_instance_key_str }} test_mode - {{ test_mode }} task_owner - {{ task.owner}} hostname - {{ ti.hostname }}",
    dag=dag)

 

参考文档

http://www.itkeyword.com/doc/5696131999617723x900/airflow-how-to-make-emailoperator-html-content-dynamic

你可能感兴趣的:(airflow)