java程序监控tomcat实现项目宕机自动重启并发送邮件提醒

2019独角兽企业重金招聘Python工程师标准>>> hot3.png

最近由于老项目频繁挂掉,由于项目经过多批人之手,短时间难以定位问题,所以只好写一个监控程序。 时间比较紧半天时间,而且水平有限大神勿喷,有好的方法还请赐教。 1、问题描述:分两种情况1.1、tomcat 彻底挂掉端口不会占用进程;1.2、并没有彻底挂掉 端口仍占用进程,但是接口访问异常; 2、解决思路:启动一个java服务轮询(10秒钟一次)程序的一个接口(比如获取当前时间),如果请求不到则查看该端口是否占用进程,如果占用则杀死进程,然后重启tomcat ;如果没有占用进程则直接重启tomca; 本来考虑的是监控多个程序的 但是时间问题就先监控一个吧

3.1 轮训接口

@Service
public class SchedulerService {
    private static final Logger logger = LoggerFactory.getLogger(SchedulerService.class);
    @Autowired
    private KillService killService;

    @Value("#{system['jiankong.ports']}")
    private String portsStr;
    @Value("#{system['url']}")
    private String url;

    /**
     * 监控接口是否通 如果接口不通 或者返回结果不对则重启服务 并发送邮件 每10秒执行一次扫描
     * @author gaozemin
     * @date 2017年10月18日
     * @throws Exception
     * @return
     */
    public void watch() throws Exception {
        String[] ports = portsStr.split(",");
        for (String port : ports) {
            // 调用测试接口
            String ret = HttpUtil.sendPost(url, null);
            if (ret == null) {// 如果返回结果为空重启服务
                logger.info("返回结果为null ");
                killService.start(Integer.valueOf(port));
            } else {
                try {
                    Map retMap = JSONObject.parseObject(ret, Map.class);
                    String retFlag = String.valueOf(retMap.get("result"));
                    if (!"200".equals(retFlag)) {// 如果返回结果异常 重启服务
                        killService.start(Integer.valueOf(port));
                    } else {
                        logger.info("系统运行正常....");
                    }
                } catch (Exception e) {
                    logger.info("返回值解析异常....");
                    killService.start(Integer.valueOf(port));
                }
            }

            logger.info("监控执行中..");
        }

    }

3.2 如果监控到异常则重启服务

@Service
public class KillService {
    private static final Logger logger = LoggerFactory.getLogger(KillService.class);

    @Value("#{system['waitTime']}")
    private Long waitTime;
    @Value("#{system['startTomcatExec']}")
    private String startTomcatExec;
    @Value("#{system['startLinuxTomcatExec']}")
    private String startLinuxTomcatExec;
    @Value("#{system['findPid']}")
    private String findPid;
    @Value("#{system['isLinux']}")
    private boolean isLinux;
    @Value("#{system['send.emails']}")
    private String emails;

    @Autowired
    private SendMail sendMail;

    private Map map = new HashMap();

    public void start(int port) {

        // 先每10秒 杀死一次进程 然后重启一次 ,执行重启后5分钟后再重新执行扫描,确保程序重新启动
        // 1 获取 指定端口的进程号
        // 如果调用接口失败则杀死进程并重新启动 ,并记录当前时间 ,否则不进行操作
        Date lastExecTime = map.get(port);
        if (lastExecTime != null) {// 如果存在重启记录则判断重启时间是否间隔5分钟
            Date nowTome = new Date();
            Long subTime = nowTome.getTime() - lastExecTime.getTime();
            logger.info("间隔时间:{}", subTime);
            if (subTime < waitTime) {
                logger.info("间隔时间过短 等待程序启动!");
                return;
            } else {
                map.put(port, new Date());
                restartTomcat(port, isLinux);
            }
        } else {
            map.put(port, new Date());
            restartTomcat(port, isLinux);
        }

    }

    private void restartTomcat(int port, boolean isLinux) {
        Runtime runtime = Runtime.getRuntime();
        try {
            if (isLinux) {
                // 查找进程号
                linuxRestart(port);
                StartTomcatThread a = new StartTomcatThread(startLinuxTomcatExec);
                a.start();
            } else {
                Process p = runtime.exec(findPid + port);
                InputStream inputStream = p.getInputStream();
                List read = read(port, inputStream, "UTF-8");
                if (read.size() == 0) {
                    logger.info("找不到端口:{}的进程", port);
                    StartTomcatThread a = new StartTomcatThread(startTomcatExec);
                    a.start();
                    logger.info("tomcat已重启");
                } else {
                    logger.info("找到" + read.size() + "个进程,正在准备清理");
                    kill(read);
                    StartTomcatThread a = new StartTomcatThread(startTomcatExec);
                    a.start();
                }
            }
            String dataStr = "admin 服务宕机  现已自动重启 请及时查看日志 修改错误!";
            String[] emailStrs = emails.split(",");
            for (String email : emailStrs) {
                sendMail.sendMsg(email, dataStr);
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 验证此行是否为指定的端口,因为 findstr命令会是把包含的找出来,例如查找80端口,但是会把8099查找出来
     * 
     * @param str
     * @return
     */
    private boolean validPort(int port, String str) {
        String patternString = "^ *[a-zA-Z]+ +\\S+";
        Pattern pattern = Pattern.compile(patternString);
        Matcher matcher = pattern.matcher(str);

        matcher.find();
        String find = matcher.group();
        int spstart = find.lastIndexOf(":");
        find = find.substring(spstart + 1);

        int findport = 0;
        try {
            findport = Integer.parseInt(find);
        } catch (NumberFormatException e) {
            System.out.println("查找到错误的端口:" + find);
            return false;
        }
        if (port == findport) {
            return true;
        } else {
            return false;
        }
    }

    public void linuxRestart(int port) throws IOException, InterruptedException {
        String cmd = "kill -9 $(netstat -tlnp|grep " + port + "|awk '{print $7}'|awk -F '/' '{print $1}')";
        String[] command = { "sh", "-c", cmd };
        Process pro = Runtime.getRuntime().exec(command);
        pro.waitFor();
        // cmd = path;
        // pro = Runtime.getRuntime().exec(cmd);
        // pro.waitFor();
    }

    /**
     * 更换为一个Set,去掉重复的pid值
     * 
     * @param data
     */
    public void kill(List data) {
        Set pids = new HashSet<>();
        logger.info("列表:{}" + pids);
        for (String line : data) {
            int offset = line.lastIndexOf(" ");
            String spid = line.substring(offset);
            spid = spid.replaceAll(" ", "");
            int pid = 0;
            try {
                pid = Integer.parseInt(spid);
            } catch (NumberFormatException e) {
                System.out.println("获取的进程号错误:" + spid);
            }
            pids.add(pid);
        }
        killWithPid(pids);
    }

    /**
     * 一次性杀除所有的端口
     * 
     * @param pids
     */
    public void killWithPid(Set pids) {
        for (Integer pid : pids) {
            try {
                Process process = Runtime.getRuntime().exec("taskkill /F /pid " + pid + "");
                InputStream inputStream = process.getInputStream();
                String txt = readTxt(inputStream, "UTF-8");
                logger.info(txt);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    private List read(int port, InputStream in, String charset) throws IOException {
        List data = new ArrayList<>();
        BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
        String line;
        while ((line = reader.readLine()) != null) {
            boolean validPort = validPort(port, line);
            if (validPort) {
                data.add(line);
            }
        }
        reader.close();
        return data;
    }

    public String readTxt(InputStream in, String charset) throws IOException {
        BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
        StringBuffer sb = new StringBuffer();
        String line;
        while ((line = reader.readLine()) != null) {
            sb.append(line);
        }
        reader.close();
        return sb.toString();
    }
}

源代码

转载于:https://my.oschina.net/u/1048997/blog/1554349

你可能感兴趣的:(java,python,操作系统)