需求描述
定时跑批的(batch,process)jar包服务必须单机运行,又要保证高可用
- 192.168.0.47 (running) 192.168.0.48(backup-HA)
- 需要额外安装的应用 : python3 ,pip3(fastapi ,uvicorn)
(sorry 没有写注释的习惯)
pip3 install fastapi
pip3 install uvicorn
解决方案
在47上提供应用信息的接口,48上调用47的接口判断java服务状态,当47上的服务丢失48上的冷备项目立刻启动,当47上的项目恢复48上的冷备项目停止。
192.168.0.47 上的操作:
[root@skpay-java-47l shell]# cat kelan_monitor.py
#!/usr/bin/python
# -*- coding:utf-8 -*-
import subprocess
import os, sys, json, datetime, time
import locale
import urllib.request
from fastapi import FastAPI
def check_project_sum(project):
project_num = subprocess.getoutput(
'ps -ef | grep {tproject}-service | grep -v grep | wc -l'.format(tproject=project))
project_num_int = int(project_num)
return project_num_int
def check_project_startedtime(project):
project_startedtime = subprocess.getoutput(
'ps -ef | grep {tproject}-service | grep -v grep | awk {pri}'.format(tproject=project, pri="'{print $5}'"))
return project_startedtime
def check_project_pid(project):
project_pid = subprocess.getoutput(
'ps -ef | grep {tproject}-service | grep -v grep | awk {pri}'.format(tproject=project, pri="'{print $2}'"))
#project_pid_int = int(project_pid)
return project_pid
def check_project_mem(project):
project_mem = subprocess.getoutput(
'ps -aux | grep {tproject}-service | grep -v grep | awk {pri}'.format(tproject=project, pri="'{sum+=$4}; END{print sum}'"))
return project_mem
app = FastAPI()
@app.get("/")
def read_root():
return {"messages": "hellokugou"}
@app.get("/items/{item_id}")
def read_item(item_id: int, q: str = None):
return {"item_id": item_id, "q": q}
@app.get("/project/{project}")
def read_project(project: str):
project_pid = check_project_pid(project)
print(project, '#######################project')
print(project_pid, '@@@@@@@@@@@@@@pid')
return {"project_name": project, "project_pid": project_pid, "project_running-sum": check_project_sum(project),
"project_started-time": check_project_startedtime(project),'project_mem': check_project_mem(project)}
if __name__ == '__main__':
import uvicorn
uvicorn.run(app=app,
host="192.168.0.47",
port=9999,
workers=1)
[root@skpay-java-47l shell]# pwd
/data/shell
测试fastapi 脚本
[root@skpay-java-48l shell]# curl -s http://192.168.0.47:9999/project/process
{"project_name":"process","project_pid":"20770","project_running-sum":1,"project_started-time":"18:55","project_mem":"2.3"}[root@skpay-java-48l shell]#
[root@skpay-java-48l shell]#
[root@skpay-java-48l shell]# curl -s http://192.168.0.47:9999
{"messages":"hellokugou"}
##也可以在最后替换其他项目名 curl -s http://192.168.0.47:9999/project/batch
curl -s 是安静输出
nohup python kelan_monitor.py & //后台启动
192.168.0.48 上的操作:
[root@skpay-java-48l shell]# cat backup_monitor.py
#!/usr/bin/python
# -*- coding:utf-8 -*-
import subprocess
import os, sys, json, datetime, time
import zipfile
import shutil
import locale
import re
import tarfile
import urllib.request
import socket,requests
def dingding(messages):
content = {
"msgtype": "text",
"text": {
"content": "{tmessages}".format(tmessages=messages)
},
"at": {
# "atMobiles": [
# # 单独 @ 某个人
# "131xxxxxx81"
# ],
# "isAtAll": Falsethe monitor kelan kelan_monitor.py is not running'
# @ 所有人
"isAtAll": True
}
}
headers = {"Content-Type": "application/json;charset=utf-8"}
url = "https://oapi.dingtalk.com/robot/send?access_token=5c8xxxxxfxxxxxa"
r = requests.post(url=url,headers=headers,json=content)
return r.content
app_file_path = '/data/app/'
def start_project(project):
start_cmd = 'sh +x /data/app/{tproject}/bin/{tproject}_std.sh start'.format(tproject=project)
os.system(start_cmd)
def check_project_sum(project):
project_num = subprocess.getoutput(
'ps -ef | grep {tproject}-service | grep -v grep | wc -l'.format(tproject=project))
project_num_int = int(project_num)
return project_num_int
def check_project_pid(project):
project_pid = subprocess.getoutput(
'ps -ef | grep {tproject}-service | grep -v grep | awk {pri}'.format(tproject=project, pri="'{print $2}'"))
# project_pid_int = int(project_pid)
return project_pid
def kill_project(project):
kill_cmd = 'kill -9 {tpid}'.format(tpid=check_project_pid(project))
os.system(kill_cmd)
project_list = ['batch','process']
#check_monitor = subprocess.getoutput('curl http://192.168.0.47:9999/project/{tproject}'.format(tproject=app_file_path))
check_monitor = subprocess.getstatusoutput('curl -s http://192.168.0.47:9999')
#print(check_monitor,type(check_monitor))
#dict_check_monitor = json.loads(check_monitor[1])
#print(dict_check_monitor,type(dict_check_monitor),dict_check_monitor['messages'])
localtime = time.asctime( time.localtime(time.time()) )
#print(localtime)
sock22 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock22result = sock22.connect_ex(('192.168.0.47',22))
#print(sock22result,type(sock22result),'port is open')
#if dict_check_monitor['messages'] == 'hellokugou':
if check_monitor[0] == 0:
print(localtime,'the monitor kelan kelan_monitor.py is running')
for i in project_list:
if check_project_sum(i) == 0:
check_projectstr = subprocess.getoutput('curl -s http://192.168.0.47:9999/project/{tproject}'.format(tproject=i))
#print(check_projectstr,type(check_projectstr))
check_projectdict = json.loads(check_projectstr)
#print(check_projectdict,type(check_projectdict),check_projectdict['project_running-sum'],type(check_projectdict['project_running-sum']))
if check_projectdict['project_running-sum'] == 1:
print(i,'is running')
else:
start_project(i)
else:
check_projectstr = subprocess.getoutput('curl -s http://192.168.0.47:9999/project/{tproject}'.format(tproject=i))
check_projectdict = json.loads(check_projectstr)
if check_projectdict['project_running-sum'] == 1:
print(i,'is running')
kill_project(i)
else:
print(i,'in backup is running')
else:
if sock22result == 0:
print(localtime,'the monitor kelan kelan_monitor.py is not running')
messages = '{tlocaltime} the monitor kelan kelan_monitor.py is not running'.format(tlocaltime=localtime)
dingding(messages)
else:
print(localtime,'the 192.168.0.47 network is wrong or ECS not star power')
for i in project_list:
start_project(i)
[root@skpay-java-48l shell]# pwd
/data/shell
[root@skpay-java-48l shell]# ls
backup_monitor.py monitor_backup.log
[root@skpay-java-48l shell]#
"/30 * /usr/bin/python /data/shell/backup_monitor.py >> monitor_backup.log" //定时每半小时出发一回,根据需求自行修改
手动测试
#####47停process服务
process #######################project
20770 @@@@@@@@@@@@@@pid
INFO: 192.168.0.48:42574 - "GET /project/process HTTP/1.1" 200 OK
INFO: 192.168.120.20:55351 - "GET / HTTP/1.1" 200 OK
INFO: 192.168.120.20:55351 - "GET /favicon.ico HTTP/1.1" 404 Not Found
process #######################project
20770 @@@@@@@@@@@@@@pid
INFO: 192.168.120.20:55368 - "GET /project/process HTTP/1.1" 200 OK
[root@skpay-java-48l data]# python backup_monitor.py
Thu Jun 11 18:55:06 2020 the monitor kelan kelan_monitor.py is running
batch is running
process in backup is running
[root@skpay-java-48l data]# python backup_monitor.py
Thu Jun 11 18:55:15 2020 the monitor kelan kelan_monitor.py is running
batch is running
process in backup is running
[root@skpay-java-48l data]# python backup_monitor.py
Thu Jun 11 18:55:31 2020 the monitor kelan kelan_monitor.py is running
batch is running
process in backup is running
####47 process服务开启
[root@skpay-java-48l data]#
[root@skpay-java-48l data]#
[root@skpay-java-48l data]#
[root@skpay-java-48l data]# python backup_monitor.py
Thu Jun 11 18:56:40 2020 the monitor kelan kelan_monitor.py is running
batch is running
process is running