先分析HeartbeatManager的成员变量:
datanodes存储的活着的datanodes;
stats存储系统全局的一些统计信息。
/** * Stores a subset of the datanodeMap in DatanodeManager, * containing nodes that are considered alive. * The HeartbeatMonitor periodically checks for out-dated entries, * and removes them from the list. * It is synchronized by the heartbeat manager lock. */ private final List<DatanodeDescriptor> datanodes = new ArrayList<DatanodeDescriptor>(); /** Statistics, which are synchronized by the heartbeat manager lock. */ private final Stats stats = new Stats(); /** The time period to check for expired datanodes */ private final long heartbeatRecheckInterval; /** Heartbeat monitor thread */ private final Daemon heartbeatThread = new Daemon(new Monitor()); final Namesystem namesystem; final BlockManager blockManager;
/** Datanode statistics. * For decommissioning/decommissioned nodes, only used capacity is counted. */ private static class Stats { private long capacityTotal = 0L; private long capacityUsed = 0L; private long capacityRemaining = 0L; private long blockPoolUsed = 0L; private int xceiverCount = 0; private long cacheCapacity = 0L; private long cacheUsed = 0L; private int nodesInService = 0; private int nodesInServiceXceiverCount = 0; private int expiredHeartbeats = 0; private void add(final DatanodeDescriptor node) { capacityUsed += node.getDfsUsed(); blockPoolUsed += node.getBlockPoolUsed(); xceiverCount += node.getXceiverCount(); if (!(node.isDecommissionInProgress() || node.isDecommissioned())) { nodesInService++; nodesInServiceXceiverCount += node.getXceiverCount(); capacityTotal += node.getCapacity(); capacityRemaining += node.getRemaining(); } else { capacityTotal += node.getDfsUsed(); } cacheCapacity += node.getCacheCapacity(); cacheUsed += node.getCacheUsed(); } private void subtract(final DatanodeDescriptor node) { capacityUsed -= node.getDfsUsed(); blockPoolUsed -= node.getBlockPoolUsed(); xceiverCount -= node.getXceiverCount(); if (!(node.isDecommissionInProgress() || node.isDecommissioned())) { nodesInService--; nodesInServiceXceiverCount -= node.getXceiverCount(); capacityTotal -= node.getCapacity(); capacityRemaining -= node.getRemaining(); } else { capacityTotal -= node.getDfsUsed(); } cacheCapacity -= node.getCacheCapacity(); cacheUsed -= node.getCacheUsed(); } /** Increment expired heartbeat counter. */ private void incrExpiredHeartbeats() { expiredHeartbeats++; } }
/** Periodically check heartbeat and update block key */ private class Monitor implements Runnable { private long lastHeartbeatCheck; private long lastBlockKeyUpdate; @Override public void run() { while(namesystem.isRunning()) { try { final long now = Time.now(); if (lastHeartbeatCheck + heartbeatRecheckInterval < now) { heartbeatCheck(); lastHeartbeatCheck = now; } if (blockManager.shouldUpdateBlockKey(now - lastBlockKeyUpdate)) { synchronized(HeartbeatManager.this) { for(DatanodeDescriptor d : datanodes) { d.needKeyUpdate = true; } } lastBlockKeyUpdate = now; } } catch (Exception e) { LOG.error("Exception while checking heartbeat", e); } try { Thread.sleep(5000); // 5 seconds } catch (InterruptedException ie) { } } } }
/** * Check if there are any expired heartbeats, and if so, * whether any blocks have to be re-replicated. * While removing dead datanodes, make sure that only one datanode is marked * dead at a time within the synchronized section. Otherwise, a cascading * effect causes more datanodes to be declared dead. * Check if there are any failed storage and if so, * Remove all the blocks on the storage. It also covers the following less * common scenarios. After DatanodeStorage is marked FAILED, it is still * possible to receive IBR for this storage. * 1) DN could deliver IBR for failed storage due to its implementation. * a) DN queues a pending IBR request. * b) The storage of the block fails. * c) DN first sends HB, NN will mark the storage FAILED. * d) DN then sends the pending IBR request. * 2) SBN processes block request from pendingDNMessages. * It is possible to have messages in pendingDNMessages that refer * to some failed storage. * a) SBN receives a IBR and put it in pendingDNMessages. * b) The storage of the block fails. * c) Edit log replay get the IBR from pendingDNMessages. * Alternatively, we can resolve these scenarios with the following approaches. * A. Make sure DN don't deliver IBR for failed storage. * B. Remove all blocks in PendingDataNodeMessages for the failed storage * when we remove all blocks from BlocksMap for that storage. */ void heartbeatCheck() { final DatanodeManager dm = blockManager.getDatanodeManager(); // It's OK to check safe mode w/o taking the lock here, we re-check // for safe mode after taking the lock before removing a datanode. if (namesystem.isInStartupSafeMode()) { return; } boolean allAlive = false; while (!allAlive) { // locate the first dead node. DatanodeID dead = null; // locate the first failed storage that isn't on a dead node. DatanodeStorageInfo failedStorage = null; // check the number of stale nodes int numOfStaleNodes = 0; int numOfStaleStorages = 0; synchronized(this) { for (DatanodeDescriptor d : datanodes) { if (dead == null && dm.isDatanodeDead(d)) { stats.incrExpiredHeartbeats(); dead = d; } if (d.isStale(dm.getStaleInterval())) { numOfStaleNodes++; } DatanodeStorageInfo[] storageInfos = d.getStorageInfos(); for(DatanodeStorageInfo storageInfo : storageInfos) { if (storageInfo.areBlockContentsStale()) { numOfStaleStorages++; } if (failedStorage == null && storageInfo.areBlocksOnFailedStorage() && d != dead) { failedStorage = storageInfo; } } } // Set the number of stale nodes in the DatanodeManager dm.setNumStaleNodes(numOfStaleNodes); dm.setNumStaleStorages(numOfStaleStorages); } allAlive = dead == null && failedStorage == null; if (dead != null) { // acquire the fsnamesystem lock, and then remove the dead node. namesystem.writeLock(); try { if (namesystem.isInStartupSafeMode()) { return; } synchronized(this) { dm.removeDeadDatanode(dead); } } finally { namesystem.writeUnlock(); } } if (failedStorage != null) { // acquire the fsnamesystem lock, and remove blocks on the storage. namesystem.writeLock(); try { if (namesystem.isInStartupSafeMode()) { return; } synchronized(this) { blockManager.removeBlocksAssociatedTo(failedStorage); } } finally { namesystem.writeUnlock(); } } } }