Hadoop NameNode启动之HeartbeatMonitor ( 五)

   Nanode启动时会启动一个心跳检测线程HeartbeatMonitor,该线程会周期性的检测数据节点的状态,如果数据节点在指定时间内未发送心跳信号,则认为该节点已死,namenode会进行块复制以维护文件的副本数。

  class HeartbeatMonitor implements Runnable {
    private long lastHeartbeatCheck;
    private long lastAccessKeyUpdate;
 
    public void run() {
      while (fsRunning) {
        try {
          long now = now();
          //注意检测规则,如果上次检测时间加上检测间隔小于当前时间,则进行检测
          //默认间隔为300000也就是5分钟
          if (lastHeartbeatCheck + heartbeatRecheckInterval < now) {
            heartbeatCheck();
            lastHeartbeatCheck = now;
          }
          if (isAccessTokenEnabled && (lastAccessKeyUpdate +
                                     accessKeyUpdateInterval < now)) {
            updateAccessKey();
            lastAccessKeyUpdate = now;
          }
        } catch (Exception e) {
          FSNamesystem.LOG.error(StringUtils.stringifyException(e));
        }
        try {
          Thread.sleep(5000);  // 每次检测完休息5秒钟
        } catch (InterruptedException ie) {
        }
      }
    }
  }

下面看下heartbeatCheck的检测流程:

void heartbeatCheck() {
    //如果是安全模式,直接返回
    if (isInSafeMode()) {
      // not to check dead nodes if in safemode
      return;
    }
    boolean allAlive = false;
    while (!allAlive) {
      boolean foundDead = false;
      DatanodeID nodeID = null;
      // locate the first dead node.
      synchronized(heartbeats) {
        for (Iterator<DatanodeDescriptor> it = heartbeats.iterator();
             it.hasNext();) {
          DatanodeDescriptor nodeInfo =it.next();
          //检测规则为当前时间减去检测间隔,如果大于上次检测时间,则宣布死亡,一次只删除一个
          //死亡节点
          if (isDatanodeDead(nodeInfo)) {
            foundDead = true;
            nodeID = nodeInfo;
            break;
          }
        }
      }
 
      // 在移除死亡节点时会锁住整个fsnamesystem
      if (foundDead) {
        synchronized (this) {
          synchronized(heartbeats) {
            synchronized (datanodeMap) {
              DatanodeDescriptor nodeInfo = null;
              try {
                nodeInfo = getDatanode(nodeID);
              } catch (IOException e) {
                nodeInfo = null;
              }
              if (nodeInfo != null && isDatanodeDead(nodeInfo)) {
                NameNode.stateChangeLog.info("BLOCK* NameSystem.heartbeatCheck:
                         "+ "lost heartbeat from " + nodeInfo.getName());
                //将该节点在heartbeats中删除,heartbeats为ArrayList
                removeDatanode(nodeInfo);
              }
            }
          }
        }
      }
      allAlive = !foundDead;
    }
  }
 


你可能感兴趣的:(Hadoop NameNode启动之HeartbeatMonitor ( 五))