2019独角兽企业重金招聘Python工程师标准>>>
最近由于老项目频繁挂掉,由于项目经过多批人之手,短时间难以定位问题,所以只好写一个监控程序。 时间比较紧半天时间,而且水平有限大神勿喷,有好的方法还请赐教。 1、问题描述:分两种情况1.1、tomcat 彻底挂掉端口不会占用进程;1.2、并没有彻底挂掉 端口仍占用进程,但是接口访问异常; 2、解决思路:启动一个java服务轮询(10秒钟一次)程序的一个接口(比如获取当前时间),如果请求不到则查看该端口是否占用进程,如果占用则杀死进程,然后重启tomcat ;如果没有占用进程则直接重启tomca; 本来考虑的是监控多个程序的 但是时间问题就先监控一个吧
3.1 轮训接口
@Service
public class SchedulerService {
private static final Logger logger = LoggerFactory.getLogger(SchedulerService.class);
@Autowired
private KillService killService;
@Value("#{system['jiankong.ports']}")
private String portsStr;
@Value("#{system['url']}")
private String url;
/**
* 监控接口是否通 如果接口不通 或者返回结果不对则重启服务 并发送邮件 每10秒执行一次扫描
* @author gaozemin
* @date 2017年10月18日
* @throws Exception
* @return
*/
public void watch() throws Exception {
String[] ports = portsStr.split(",");
for (String port : ports) {
// 调用测试接口
String ret = HttpUtil.sendPost(url, null);
if (ret == null) {// 如果返回结果为空重启服务
logger.info("返回结果为null ");
killService.start(Integer.valueOf(port));
} else {
try {
Map retMap = JSONObject.parseObject(ret, Map.class);
String retFlag = String.valueOf(retMap.get("result"));
if (!"200".equals(retFlag)) {// 如果返回结果异常 重启服务
killService.start(Integer.valueOf(port));
} else {
logger.info("系统运行正常....");
}
} catch (Exception e) {
logger.info("返回值解析异常....");
killService.start(Integer.valueOf(port));
}
}
logger.info("监控执行中..");
}
}
3.2 如果监控到异常则重启服务
@Service
public class KillService {
private static final Logger logger = LoggerFactory.getLogger(KillService.class);
@Value("#{system['waitTime']}")
private Long waitTime;
@Value("#{system['startTomcatExec']}")
private String startTomcatExec;
@Value("#{system['startLinuxTomcatExec']}")
private String startLinuxTomcatExec;
@Value("#{system['findPid']}")
private String findPid;
@Value("#{system['isLinux']}")
private boolean isLinux;
@Value("#{system['send.emails']}")
private String emails;
@Autowired
private SendMail sendMail;
private Map map = new HashMap();
public void start(int port) {
// 先每10秒 杀死一次进程 然后重启一次 ,执行重启后5分钟后再重新执行扫描,确保程序重新启动
// 1 获取 指定端口的进程号
// 如果调用接口失败则杀死进程并重新启动 ,并记录当前时间 ,否则不进行操作
Date lastExecTime = map.get(port);
if (lastExecTime != null) {// 如果存在重启记录则判断重启时间是否间隔5分钟
Date nowTome = new Date();
Long subTime = nowTome.getTime() - lastExecTime.getTime();
logger.info("间隔时间:{}", subTime);
if (subTime < waitTime) {
logger.info("间隔时间过短 等待程序启动!");
return;
} else {
map.put(port, new Date());
restartTomcat(port, isLinux);
}
} else {
map.put(port, new Date());
restartTomcat(port, isLinux);
}
}
private void restartTomcat(int port, boolean isLinux) {
Runtime runtime = Runtime.getRuntime();
try {
if (isLinux) {
// 查找进程号
linuxRestart(port);
StartTomcatThread a = new StartTomcatThread(startLinuxTomcatExec);
a.start();
} else {
Process p = runtime.exec(findPid + port);
InputStream inputStream = p.getInputStream();
List read = read(port, inputStream, "UTF-8");
if (read.size() == 0) {
logger.info("找不到端口:{}的进程", port);
StartTomcatThread a = new StartTomcatThread(startTomcatExec);
a.start();
logger.info("tomcat已重启");
} else {
logger.info("找到" + read.size() + "个进程,正在准备清理");
kill(read);
StartTomcatThread a = new StartTomcatThread(startTomcatExec);
a.start();
}
}
String dataStr = "admin 服务宕机 现已自动重启 请及时查看日志 修改错误!";
String[] emailStrs = emails.split(",");
for (String email : emailStrs) {
sendMail.sendMsg(email, dataStr);
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 验证此行是否为指定的端口,因为 findstr命令会是把包含的找出来,例如查找80端口,但是会把8099查找出来
*
* @param str
* @return
*/
private boolean validPort(int port, String str) {
String patternString = "^ *[a-zA-Z]+ +\\S+";
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(str);
matcher.find();
String find = matcher.group();
int spstart = find.lastIndexOf(":");
find = find.substring(spstart + 1);
int findport = 0;
try {
findport = Integer.parseInt(find);
} catch (NumberFormatException e) {
System.out.println("查找到错误的端口:" + find);
return false;
}
if (port == findport) {
return true;
} else {
return false;
}
}
public void linuxRestart(int port) throws IOException, InterruptedException {
String cmd = "kill -9 $(netstat -tlnp|grep " + port + "|awk '{print $7}'|awk -F '/' '{print $1}')";
String[] command = { "sh", "-c", cmd };
Process pro = Runtime.getRuntime().exec(command);
pro.waitFor();
// cmd = path;
// pro = Runtime.getRuntime().exec(cmd);
// pro.waitFor();
}
/**
* 更换为一个Set,去掉重复的pid值
*
* @param data
*/
public void kill(List data) {
Set pids = new HashSet<>();
logger.info("列表:{}" + pids);
for (String line : data) {
int offset = line.lastIndexOf(" ");
String spid = line.substring(offset);
spid = spid.replaceAll(" ", "");
int pid = 0;
try {
pid = Integer.parseInt(spid);
} catch (NumberFormatException e) {
System.out.println("获取的进程号错误:" + spid);
}
pids.add(pid);
}
killWithPid(pids);
}
/**
* 一次性杀除所有的端口
*
* @param pids
*/
public void killWithPid(Set pids) {
for (Integer pid : pids) {
try {
Process process = Runtime.getRuntime().exec("taskkill /F /pid " + pid + "");
InputStream inputStream = process.getInputStream();
String txt = readTxt(inputStream, "UTF-8");
logger.info(txt);
} catch (IOException e) {
e.printStackTrace();
}
}
}
private List read(int port, InputStream in, String charset) throws IOException {
List data = new ArrayList<>();
BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
String line;
while ((line = reader.readLine()) != null) {
boolean validPort = validPort(port, line);
if (validPort) {
data.add(line);
}
}
reader.close();
return data;
}
public String readTxt(InputStream in, String charset) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
StringBuffer sb = new StringBuffer();
String line;
while ((line = reader.readLine()) != null) {
sb.append(line);
}
reader.close();
return sb.toString();
}
}
源代码