Apollo 应用与源码分析:Monitor监控-软件监控-进程存活监控-process_monitor

Apollo 应用与源码分析:Monitor监控-软件监控-进程存活监控-process_monitor_第1张图片

目录

流程

代码

分析

获取可以运行的进程的信息

检查HMI 的模块信息

检查被监控的组件

检查其他组件

判断进程状态UpdateStatus


流程

Apollo 应用与源码分析:Monitor监控-软件监控-进程存活监控-process_monitor_第2张图片

代码

class ProcessMonitor : public RecurrentRunner {
 public:
  ProcessMonitor();
  void RunOnce(const double current_time) override;

 private:
  static void UpdateStatus(
      const std::vector& running_processes,
      const apollo::dreamview::ProcessMonitorConfig& config,
      ComponentStatus* status);
};

void ProcessMonitor::RunOnce(const double current_time) {
  // Get running processes.
  std::vector running_processes;
  for (const auto& cmd_file : cyber::common::Glob("/proc/*/cmdline")) {
    // Get process command string.
    std::string cmd_string;
    if (cyber::common::GetContent(cmd_file, &cmd_string) &&
        !cmd_string.empty()) {
      // In /proc//cmdline, the parts are separated with \0, which will be
      // converted back to whitespaces here.
      std::replace(cmd_string.begin(), cmd_string.end(), '\0', ' ');
      running_processes.push_back(cmd_string);
    }
  }

  auto manager = MonitorManager::Instance();
  const auto& mode = manager->GetHMIMode();

  // Check HMI modules.
  auto* hmi_modules = manager->GetStatus()->mutable_hmi_modules();
  for (const auto& iter : mode.modules()) {
    const std::string& module_name = iter.first;
    const auto& config = iter.second.process_monitor_config();
    UpdateStatus(running_processes, config, &hmi_modules->at(module_name));
  }

  // Check monitored components.
  auto* components = manager->GetStatus()->mutable_components();
  for (const auto& iter : mode.monitored_components()) {
    const std::string& name = iter.first;
    if (iter.second.has_process() &&
        apollo::common::util::ContainsKey(*components, name)) {
      const auto& config = iter.second.process();
      auto* status = components->at(name).mutable_process_status();
      UpdateStatus(running_processes, config, status);
    }
  }

  // Check other components.
  auto* other_components = manager->GetStatus()->mutable_other_components();
  for (const auto& iter : mode.other_components()) {
    const std::string& name = iter.first;
    const auto& config = iter.second;
    UpdateStatus(running_processes, config, &other_components->at(name));
  }
}

分析

核心的处理逻辑还在runOnce 中

获取可以运行的进程的信息

// Get running processes.
  std::vector running_processes;
  for (const auto& cmd_file : cyber::common::Glob("/proc/*/cmdline")) {
    // Get process command string.
    std::string cmd_string;
    if (cyber::common::GetContent(cmd_file, &cmd_string) &&
        !cmd_string.empty()) {
      // In /proc//cmdline, the parts are separated with \0, which will be
      // converted back to whitespaces here.
      std::replace(cmd_string.begin(), cmd_string.end(), '\0', ' ');
      running_processes.push_back(cmd_string);
    }
  }

首先迭代遍历所有的/proc/*/cmdline 文件,这个文件是描述进程相关信息的,中间的*就是pid关于该文件的具体内容可以参考:https://blog.csdn.net/whatday/article/details/108897457

上述文件中可以获取该进程的命令行参数,包括进程的启动路径(argv[0])

检查HMI 的模块信息

 // Check HMI modules.
  auto* hmi_modules = manager->GetStatus()->mutable_hmi_modules();
  for (const auto& iter : mode.modules()) {
    const std::string& module_name = iter.first;
    const auto& config = iter.second.process_monitor_config();
    UpdateStatus(running_processes, config, &hmi_modules->at(module_name));
  }
  1. 获取所有的HMI配置的module;
  2. 将系统中检查到的运行的进程与配置的运行的module 进行检查对比,把最终的检查状态给到&hmi_modules->at(module_name)

检查被监控的组件

// Check monitored components.
  auto* components = manager->GetStatus()->mutable_components();
  for (const auto& iter : mode.monitored_components()) {
    const std::string& name = iter.first;
    if (iter.second.has_process() &&
        apollo::common::util::ContainsKey(*components, name)) {
      const auto& config = iter.second.process();
      auto* status = components->at(name).mutable_process_status();
      UpdateStatus(running_processes, config, status);
    }
  }
  1. 首先获取所有的组件;
  2. 遍历要监控的组件,获取到进程的config 信息;
  3. 给到UpdateStatus进行最终的判断;

检查其他组件

  auto* other_components = manager->GetStatus()->mutable_other_components();
  for (const auto& iter : mode.other_components()) {
    const std::string& name = iter.first;
    const auto& config = iter.second;
    UpdateStatus(running_processes, config, &other_components->at(name));
  }
}

逻辑同上

判断进程状态UpdateStatus

void ProcessMonitor::UpdateStatus(
    const std::vector& running_processes,
    const apollo::dreamview::ProcessMonitorConfig& config,
    ComponentStatus* status) {
  status->clear_status();
  for (const std::string& command : running_processes) {
    bool all_keywords_matched = true;
    for (const std::string& keyword : config.command_keywords()) {
      if (command.find(keyword) == std::string::npos) {
        all_keywords_matched = false;
        break;
      }
    }
    if (all_keywords_matched) {
      // Process command keywords are all matched. The process is running.
      SummaryMonitor::EscalateStatus(ComponentStatus::OK, command, status);
      return;
    }
  }
  SummaryMonitor::EscalateStatus(ComponentStatus::FATAL, "", status);
}

这就是整个进程监控的核心判断函数了。

  1. 遍历之前从/proc/*/cmdline中获取到的running_processes;
  2. 从config 中遍历command_keywords,并看running_processes中是否能找到一个程序名(启动位置),命名行参数都能匹配上的item
  3. 如果找到就是true,如果找不到就是false

你可能感兴趣的:(自动驾驶,java,前端,javascript,自动驾驶,c++)