#!/usr/bin/env python
# coding: utf-8
import json
import time
from kafka import KafkaProducer
def sendMsg2Kafka(topic, msg, env=""):
# 整个主逻辑放到try-except块中,统一处理异常
try:
if env == "":
# 如果是空说明从配置文件读取信息,也可以改成文件名
f = open("config", "r")
if f is None:
# 配置文件读入错误
raise Exception("配置文件读取失败")
# 假设配置文件里面是以列表形式储存服务器
env = eval(f.read())
# 初始化Kafka生产者
try:
p = KafkaProducer(bootstrap_servers=env, value_serializer=lambda v: json.dumps(v).encode('utf-8'))
except Exception as e:
raise Exception("连接Kafka失败!{}".format(str(e)))
# 尝试发送消息
try:
p.send(topic, msg)
p.flush()
p.close()
except Exception as e:
raise Exception("发送消息失败!{}".format(str(e)))
# 成功
rtn = {"rcode": 0, "rmsg": ""}
try:
p = KafkaProducer(bootstrap_servers=env, value_serializer=lambda v: json.dumps(v).encode('utf-8'))
p.send("Expr_RunLog", "[{}]: 发送消息成功!{}".format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) , rtn))
p.flush()
p.close()
except Exception as e:
raise Exception("发送消息成功,发送日志失败!{}".format(str(e)))
return json.dumps(rtn)
except Exception as e:
rtn = {"rcode": 1, "rmsg": str(e)}
try:
p = KafkaProducer(bootstrap_servers=env, value_serializer=lambda v: json.dumps(v).encode('utf-8'))
p.send("Expr_RunLog", "[{}]: 发送消息失败!{}".format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) , rtn))
p.flush()
p.close()
except Exception as e:
raise Exception("发送消息失败,发送日志失败!{}".format(str(e)))
return json.dumps(rtn)
# 以下是调用
# topic = "test" # 将要传入的topic
# msg = "{trid:0001,instid:0000001-001}" # 将要传入的消息
# env = ['91.91.91.107:9092', '91.91.91.108:9092', '91.91.91.109:9092'] # Kafka环境链接信息
# print(sendMsg2Kafka(topic, msg, env)) # 输出结果
作业流
from kafka import KafkaConsumer
import json
import pymysql
from jiuyin_framework_util import sendMsg2Kafka
import datetime
import os
from task_build impoat task_build
from save_to_Mongo import save_to_Mongo
import jupyter_client
topic = 'EXPR_RUNLOG'
env = ['91.91.91.107:9092', '91.91.91.108:9092', '91.91.91.109:9092']
MONGO_DB = 'local' # 数据库名
MONGO_TABLE = 'qwe123' # 表名
#创建kernel
km,kc=jupyter_client.manager.start_new_kernel()
consumer = KafkaConsumer(topic, bootstrap_servers=env,
value_deserializer=lambda m: json.loads(m.decode('utf-8')),auto_offset_reset='latest')
for message in consumer:
print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
#将json串转化成dict
#msgvalue=json.loads(message.value)
msgvalue=message.value
#连接mysql数据库
conn = pymysql.connect(host='91.91.91.101',port=33061,db='pdid',user='pdid',password='PDID_987aMY',charset='utf8')
cursor = conn.cursor()
#事件类型为作业流
if msgvalue['TASK_TYPE']=='JF':
JF_BEGIN_TS=msgvalue['BEGIN_TS']
#事件类型为作业流开始事件,连接mysql查依赖表,将前置为begin的任务取出
if msgvalue['STATUS']=='1':
sql = "select * from JX_TAB1 where PRE = '%s' and TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s' ;"
cursor.execute(sql %('begin',msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
beginrst = cursor.fetchall()
#发送前置为begin的任务的开始事件
for i in beginrst:
msg={"TENANT_ID":msgvalue['TENANT_ID'],
"EXPR_ID":msgvalue['EXPR_ID'],
"EXPR_SNAPSHOT_ID":msgvalue['EXPR_SNAPSHOT_ID'],
"TASK_TYPE":"TASK",
"TASK_ID":i[3],
"BEGIN_TS": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"END_TS": "",
"STATUS": '1'}
#msg=json.dumps(msg)
sendMsg2Kafka(topic, msg, env)
save_to_Mongo(MONGO_DB,MONGO_TABLE,msg)
#事件类型为作业流结束事件,清理实例结束循环
if msgvalue['STATUS']=='2':
sql1 = "delete from JX_TAB1 where TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s';"
sql2 = "delete from GX_TAB1 where TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s';"
cursor.execute(sql1 %(msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
conn.commit()
cursor.execute(sql2 %(msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
conn.commit()
# 关闭数据库连接
cursor.close()
conn.close()
print('作业流运行成功!')
#break
if msgvalue['STATUS']=='3':
sql1 = "delete from JX_TAB1 where TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s';"
sql2 = "delete from GX_TAB1 where TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s';"
cursor.execute(sql1 %(msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
conn.commit()
cursor.execute(sql2 %(msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
conn.commit()
# 关闭数据库连接
cursor.close()
conn.close()
print('作业流运行失败!')
#break
#事件类型为作业(任务)
if msgvalue['TASK_TYPE']=='TASK':
TASK_BEGIN_TS=msgvalue['BEGIN_TS']
#事件类型为作业开始事件
modules=['from read_hdfs import HDFS2MongoJSON','from jiuyin_framework_util import sendMsg2Kafka','import datetime','import json']
varries={'TENANT_ID':msgvalue['TENANT_ID'],'EXPR_ID':msgvalue['EXPR_ID'],'EXPR_SNAPSHOT_ID':msgvalue['EXPR_SNAPSHOT_ID'],'TASK_ID':msgvalue['TASK_ID'],'BEGIN_TS':TASK_BEGIN_TS,'topic':'EXPR_RUNLOG','MONGO_DB':MONGO_DB,'MONGO_TABLE':MONGO_TABLE}
jsons='{"u_oper_para": {"file_path":"http://91.91.91.107:9870/data/pdid/wengad/iris/iris.csv","expanded_name":"csv","encoding":"utf-8","get_line_num":"all","field_deli":",","guess_datatype":"true","gen_col_title":"ture","data_begin_line":0,"col_title_line":0},"u_oper_io_para":{"output":{"colinfo":"colinfo_123456","data":"df_1465"}}}'
function=['\n',
'try:',
"\tHDFS2MongoJSON('{0}')".format(jsons),
#"\tprint(x)",
'except:',
"\tprint('算子运行出错')",
'\tmsg={"TENANT_ID":TENANT_ID,\n\t"EXPR_ID": EXPR_ID,\n\t"EXPR_SNAPSHOT_ID": EXPR_SNAPSHOT_ID,\n\t"TASK_TYPE": "TASK",\n\t"TASK_ID":TASK_ID,\n\t"BEGIN_TS":BEGIN_TS ,\n\t"END_TS": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),\n\t"STATUS": "3"}',
#'\tmsg=json.dumps(msg)',
'\tsendMsg2Kafka(topic,msg)',
'else:',
"\tprint('算子运行成功')",
'\tmsg={"TENANT_ID":TENANT_ID,\n\t"EXPR_ID": EXPR_ID,\n\t"EXPR_SNAPSHOT_ID": EXPR_SNAPSHOT_ID,\n\t"TASK_TYPE": "TASK",\n\t"TASK_ID":TASK_ID,\n\t"BEGIN_TS":BEGIN_TS ,\n\t"END_TS": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),\n\t"STATUS": "2"}',
#'\tmsg=json.dumps(msg)',
'\tsave_to_Mongo(MONGO_DB,MONGO_TABLE,msg)',
'\tsendMsg2Kafka(topic,msg)']
#事件类型为作业开始事件
pre_status=[]
if msgvalue['STATUS']=='1':
#查看任务的前置是否完成
sql1 = "select PRE from JX_TAB1 where TASK_ID='%s' and TENANT_ID ='%s' and EXPR_ID='%s' and EXPR_SNAPSHOT_ID= '%s' and status='0';"
cursor.execute(sql1 %(msgvalue['TASK_ID'],msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
result = cursor.fetchone()
if result != None:
if result[0]=='begin':
pre_status.append('2')
else:
prelist=result[0].split(",")
#print(prelist)
for j in prelist:
sql2 = "select STATUS from JX_TAB1 where TASK_ID='%s' and TENANT_ID ='%s' and EXPR_ID='%s' and EXPR_SNAPSHOT_ID= '%s';"
cursor.execute(sql2 %(j,msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
prerst = cursor.fetchone()
if prerst[0] not in pre_status:
pre_status.append(prerst[0])
else:
continue
else:
continue
if len(pre_status)==1 and pre_status[0]=='2':
sql3 = "update JX_TAB1 set STATUS = '%s' where TASK_ID = '%s' and TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s' and status='0';"
cursor.execute(sql3 %('1',msgvalue['TASK_ID'],msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
conn.commit()
print('运行{0}作业'.format(msgvalue['TASK_ID']))
#组建.py文件并使用相应的kernel运行
task_build(msgvalue['TASK_ID'],modules,varries,function,jsons)
#os.system('python {0}.py'.format(msgvalue['TASK_ID']))
kc.execute('%run /home/datatech/pyspace/ssz/{0}.py'.format(msgvalue['TASK_ID']))
else:
print('作业前置任务未完成,作业启动失败。')
#事件类型为作业结束事件
else:
#事件类型为作业运行成功事件,更新依赖表,找到完成的作业的后置任务执行
if msgvalue['STATUS']=='2':
sql1 = "update JX_TAB1 set STATUS = %s where TASK_ID = '%s' and TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s' and status='1';"
cursor.execute(sql1 %('2',msgvalue['TASK_ID'],msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
conn.commit()
print('作业{0}运行成功'.format(msgvalue['TASK_ID']))
sql2 = "select POST from JX_TAB1 where TASK_ID = '%s'and TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s' and status ='2';"
cursor.execute(sql2 % (msgvalue['TASK_ID'],msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
post_result = cursor.fetchone()
if post_result[0]=='end':
print('{0}作业没有后置作业'.format(msgvalue['TASK_ID']))
else:
#print(post_result[0])
postlist=post_result[0].split(",")
for i in postlist:
msg={"TENANT_ID":msgvalue['TENANT_ID'],
"EXPR_ID": msgvalue['EXPR_ID'],
"EXPR_SNAPSHOT_ID": msgvalue['EXPR_SNAPSHOT_ID'],
"TASK_TYPE": "TASK",
"TASK_ID": i,
"BEGIN_TS": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"END_TS": "",
"STATUS": '1'}
#msg=json.dumps(msg)
sendMsg2Kafka(topic, msg, env)
save_to_Mongo(MONGO_DB,MONGO_TABLE,msg)
continue
#事件类型为作业运行失败事件,更新依赖表中的作业状态为失败
if msgvalue['STATUS']=='3':
sql1 = "update JX_TAB1 set STATUS = '%s' where TASK_ID = '%s' and TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s';"
cursor.execute(sql1 %('3',msgvalue['TASK_ID'],msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
conn.commit()
print('作业{0}运行失败'.format(msgvalue['TASK_ID']))
#判断作业流是否结束:
sql1 = "select TASK_ID,PRE from JX_TAB1 where STATUS = %s and TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s';"
end_status=[]
cursor.execute(sql1 %('1',msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
running = cursor.fetchall()
if not (running):
print('没有正在运行的任务')
cursor.execute(sql1 %('0',msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
notrun = cursor.fetchall()
if not (notrun):
print('没有未运行的任务')
sql2 = "select STATUS from JX_TAB1 where TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s';"
cursor.execute(sql2 %(msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
endrst = cursor.fetchall()
for i in endrst:
if i[0] not in end_status:
end_status.append(i[0])
else:
continue
if len(end_status)==1 and end_status[0]=='2':
print('作业全部运行成功,作业流运行成功')
#发送作业流结束事件(成功)
msg={"TENANT_ID":msgvalue['TENANT_ID'],
"EXPR_ID": msgvalue['EXPR_ID'],
"EXPR_SNAPSHOT_ID": msgvalue['EXPR_SNAPSHOT_ID'],
"TASK_TYPE": "JF",
"TASK_ID": "",
"BEGIN_TS": JF_BEGIN_TS,
"END_TS": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"STATUS": '2'}
#msg=json.dumps(msg)
sendMsg2Kafka(topic, msg, env)
save_to_Mongo(MONGO_DB,MONGO_TABLE,msg)
else:
print('有作业运行失败,作业流运行失败')
#发送作业流结束事件(失败)
msg={"TENANT_ID":msgvalue['TENANT_ID'],
"EXPR_ID": msgvalue['EXPR_ID'],
"EXPR_SNAPSHOT_ID": msgvalue['EXPR_SNAPSHOT_ID'],
"TASK_TYPE": "JF",
"TASK_ID": "",
"BEGIN_TS": JF_BEGIN_TS,
"END_TS": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"STATUS": '3'}
#msg=json.dumps(msg)
sendMsg2Kafka(topic, msg, env)
save_to_Mongo(MONGO_DB,MONGO_TABLE,msg)
else:
print('有未运行的任务')
lostdic=dict()
sql3 = "select task_id,pre from JX_TAB1 where TENANT_ID ='%s' and EXPR_ID = '%s' and EXPR_SNAPSHOT_ID = '%s' and STATUS='0';"
cursor.execute(sql3 %(msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID']))
endrst = cursor.fetchall()
sql4 = "select status from JX_TAB1 where TENANT_ID ='%s' and EXPR_ID ='%s' and EXPR_SNAPSHOT_ID ='%s' and task_id='%s';"
for i in endrst:
lostdic[i[0]]=[]
lost=i[1].split(',')
#print(lost)
for j in lost:
cursor.execute(sql4 %(msgvalue['TENANT_ID'],msgvalue['EXPR_ID'],msgvalue['EXPR_SNAPSHOT_ID'],j))
lostrst = cursor.fetchone()
#print(lostrst)
lostdic[i[0]].append(lostrst[0])
lostnum=[]
for key in lostdic:
#print(lostdic[key])
if len(lostdic[key])==1:
if lostdic[key][0]=='3' or lostdic[key][0]=='0':
lostnum.append(key)
else:
continue
else:
if '3' or '0' in lostdic[key]:
lostnum.append(key)
if len(lostdic) == len(lostnum):
print("未运行任务的前置任务运行失败")
#发送作业流结束事件(失败)
msg={"TENANT_ID":msgvalue['TENANT_ID'],
"EXPR_ID": msgvalue['EXPR_ID'],
"EXPR_SNAPSHOT_ID": msgvalue['EXPR_SNAPSHOT_ID'],
"TASK_TYPE": "JF",
"TASK_ID": "",
"BEGIN_TS": JF_BEGIN_TS,
"END_TS": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"STATUS": '3'}
#msg=json.dumps(msg)
sendMsg2Kafka(topic, msg, env)
save_to_Mongo(MONGO_DB,MONGO_TABLE,msg)
#关闭数据库连接
cursor.close()
conn.close()