先分析HeartbeatManager的成员变量:
datanodes存储的活着的datanodes;
stats存储系统全局的一些统计信息。
/**
* Stores a subset of the datanodeMap in DatanodeManager,
* containing nodes that are considered alive.
* The HeartbeatMonitor periodically checks for out-dated entries,
* and removes them from the list.
* It is synchronized by the heartbeat manager lock.
*/
private final List datanodes = new ArrayList();
/** Statistics, which are synchronized by the heartbeat manager lock. */
private final Stats stats = new Stats();
/** The time period to check for expired datanodes */
private final long heartbeatRecheckInterval;
/** Heartbeat monitor thread */
private final Daemon heartbeatThread = new Daemon(new Monitor());
final Namesystem namesystem;
final BlockManager blockManager;
/** Datanode statistics.
* For decommissioning/decommissioned nodes, only used capacity is counted.
*/
private static class Stats {
private long capacityTotal = 0L;
private long capacityUsed = 0L;
private long capacityRemaining = 0L;
private long blockPoolUsed = 0L;
private int xceiverCount = 0;
private long cacheCapacity = 0L;
private long cacheUsed = 0L;
private int nodesInService = 0;
private int nodesInServiceXceiverCount = 0;
private int expiredHeartbeats = 0;
private void add(final DatanodeDescriptor node) {
capacityUsed += node.getDfsUsed();
blockPoolUsed += node.getBlockPoolUsed();
xceiverCount += node.getXceiverCount();
if (!(node.isDecommissionInProgress() || node.isDecommissioned())) {
nodesInService++;
nodesInServiceXceiverCount += node.getXceiverCount();
capacityTotal += node.getCapacity();
capacityRemaining += node.getRemaining();
} else {
capacityTotal += node.getDfsUsed();
}
cacheCapacity += node.getCacheCapacity();
cacheUsed += node.getCacheUsed();
}
private void subtract(final DatanodeDescriptor node) {
capacityUsed -= node.getDfsUsed();
blockPoolUsed -= node.getBlockPoolUsed();
xceiverCount -= node.getXceiverCount();
if (!(node.isDecommissionInProgress() || node.isDecommissioned())) {
nodesInService--;
nodesInServiceXceiverCount -= node.getXceiverCount();
capacityTotal -= node.getCapacity();
capacityRemaining -= node.getRemaining();
} else {
capacityTotal -= node.getDfsUsed();
}
cacheCapacity -= node.getCacheCapacity();
cacheUsed -= node.getCacheUsed();
}
/** Increment expired heartbeat counter. */
private void incrExpiredHeartbeats() {
expiredHeartbeats++;
}
}
/** Periodically check heartbeat and update block key */
private class Monitor implements Runnable {
private long lastHeartbeatCheck;
private long lastBlockKeyUpdate;
@Override
public void run() {
while(namesystem.isRunning()) {
try {
final long now = Time.now();
if (lastHeartbeatCheck + heartbeatRecheckInterval < now) {
heartbeatCheck();
lastHeartbeatCheck = now;
}
if (blockManager.shouldUpdateBlockKey(now - lastBlockKeyUpdate)) {
synchronized(HeartbeatManager.this) {
for(DatanodeDescriptor d : datanodes) {
d.needKeyUpdate = true;
}
}
lastBlockKeyUpdate = now;
}
} catch (Exception e) {
LOG.error("Exception while checking heartbeat", e);
}
try {
Thread.sleep(5000); // 5 seconds
} catch (InterruptedException ie) {
}
}
}
}
/**
* Check if there are any expired heartbeats, and if so,
* whether any blocks have to be re-replicated.
* While removing dead datanodes, make sure that only one datanode is marked
* dead at a time within the synchronized section. Otherwise, a cascading
* effect causes more datanodes to be declared dead.
* Check if there are any failed storage and if so,
* Remove all the blocks on the storage. It also covers the following less
* common scenarios. After DatanodeStorage is marked FAILED, it is still
* possible to receive IBR for this storage.
* 1) DN could deliver IBR for failed storage due to its implementation.
* a) DN queues a pending IBR request.
* b) The storage of the block fails.
* c) DN first sends HB, NN will mark the storage FAILED.
* d) DN then sends the pending IBR request.
* 2) SBN processes block request from pendingDNMessages.
* It is possible to have messages in pendingDNMessages that refer
* to some failed storage.
* a) SBN receives a IBR and put it in pendingDNMessages.
* b) The storage of the block fails.
* c) Edit log replay get the IBR from pendingDNMessages.
* Alternatively, we can resolve these scenarios with the following approaches.
* A. Make sure DN don't deliver IBR for failed storage.
* B. Remove all blocks in PendingDataNodeMessages for the failed storage
* when we remove all blocks from BlocksMap for that storage.
*/
void heartbeatCheck() {
final DatanodeManager dm = blockManager.getDatanodeManager();
// It's OK to check safe mode w/o taking the lock here, we re-check
// for safe mode after taking the lock before removing a datanode.
if (namesystem.isInStartupSafeMode()) {
return;
}
boolean allAlive = false;
while (!allAlive) {
// locate the first dead node.
DatanodeID dead = null;
// locate the first failed storage that isn't on a dead node.
DatanodeStorageInfo failedStorage = null;
// check the number of stale nodes
int numOfStaleNodes = 0;
int numOfStaleStorages = 0;
synchronized(this) {
for (DatanodeDescriptor d : datanodes) {
if (dead == null && dm.isDatanodeDead(d)) {
stats.incrExpiredHeartbeats();
dead = d;
}
if (d.isStale(dm.getStaleInterval())) {
numOfStaleNodes++;
}
DatanodeStorageInfo[] storageInfos = d.getStorageInfos();
for(DatanodeStorageInfo storageInfo : storageInfos) {
if (storageInfo.areBlockContentsStale()) {
numOfStaleStorages++;
}
if (failedStorage == null &&
storageInfo.areBlocksOnFailedStorage() &&
d != dead) {
failedStorage = storageInfo;
}
}
}
// Set the number of stale nodes in the DatanodeManager
dm.setNumStaleNodes(numOfStaleNodes);
dm.setNumStaleStorages(numOfStaleStorages);
}
allAlive = dead == null && failedStorage == null;
if (dead != null) {
// acquire the fsnamesystem lock, and then remove the dead node.
namesystem.writeLock();
try {
if (namesystem.isInStartupSafeMode()) {
return;
}
synchronized(this) {
dm.removeDeadDatanode(dead);
}
} finally {
namesystem.writeUnlock();
}
}
if (failedStorage != null) {
// acquire the fsnamesystem lock, and remove blocks on the storage.
namesystem.writeLock();
try {
if (namesystem.isInStartupSafeMode()) {
return;
}
synchronized(this) {
blockManager.removeBlocksAssociatedTo(failedStorage);
}
} finally {
namesystem.writeUnlock();
}
}
}
}