/** * An object that contains information about a block that * is being replicated. It records the timestamp when the * system started replicating the most recent copy of this * block. It also records the list of Datanodes where the * replication requests are in progress. */ static class PendingBlockInfo { private long timeStamp; private final List<DatanodeDescriptor> targets; PendingBlockInfo(DatanodeDescriptor[] targets) { this.timeStamp = now(); this.targets = targets == null ? new ArrayList<DatanodeDescriptor>() : new ArrayList<DatanodeDescriptor>(Arrays.asList(targets)); } long getTimeStamp() { return timeStamp; } void setTimeStamp() { timeStamp = now(); } void incrementReplicas(DatanodeDescriptor... newTargets) { if (newTargets != null) { for (DatanodeDescriptor dn : newTargets) { targets.add(dn); } } } void decrementReplicas(DatanodeDescriptor dn) { targets.remove(dn); } int getNumReplicas() { return targets.size(); } }
/*************************************************** * PendingReplicationBlocks does the bookkeeping of all * blocks that are getting replicated. * * It does the following: * 1) record blocks that are getting replicated at this instant. * 2) a coarse grain timer to track age of replication request * 3) a thread that periodically identifies replication-requests * that never made it. * ***************************************************/ class PendingReplicationBlocks { private static final Log LOG = BlockManager.LOG; private final Map<Block, PendingBlockInfo> pendingReplications; private final ArrayList<Block> timedOutItems; Daemon timerThread = null; private volatile boolean fsRunning = true; // // It might take anywhere between 5 to 10 minutes before // a request is timed out. // private long timeout = 5 * 60 * 1000; private final static long DEFAULT_RECHECK_INTERVAL = 5 * 60 * 1000;increment方法为一个数据块增加一个指定datanode结点的复本。
/** * Add a block to the list of pending Replications * @param block The corresponding block * @param targets The DataNodes where replicas of the block should be placed */ void increment(Block block, DatanodeDescriptor[] targets) { synchronized (pendingReplications) { PendingBlockInfo found = pendingReplications.get(block); if (found == null) { pendingReplications.put(block, new PendingBlockInfo(targets)); } else { found.incrementReplicas(targets); found.setTimeStamp(); } } }
/** * One replication request for this block has finished. * Decrement the number of pending replication requests * for this block. * * @param The DataNode that finishes the replication */ void decrement(Block block, DatanodeDescriptor dn) { synchronized (pendingReplications) { PendingBlockInfo found = pendingReplications.get(block); if (found != null) { if(LOG.isDebugEnabled()) { LOG.debug("Removing pending replication for " + block); } found.decrementReplicas(dn); if (found.getNumReplicas() <= 0) { pendingReplications.remove(block); } } } }
/* * A periodic thread that scans for blocks that never finished * their replication request. */ class PendingReplicationMonitor implements Runnable { @Override public void run() { while (fsRunning) { long period = Math.min(DEFAULT_RECHECK_INTERVAL, timeout); try { pendingReplicationCheck(); Thread.sleep(period); } catch (InterruptedException ie) { if(LOG.isDebugEnabled()) { LOG.debug("PendingReplicationMonitor thread is interrupted.", ie); } } } } /** * Iterate through all items and detect timed-out items */ void pendingReplicationCheck() { synchronized (pendingReplications) { Iterator<Map.Entry<Block, PendingBlockInfo>> iter = pendingReplications.entrySet().iterator(); long now = now(); if(LOG.isDebugEnabled()) { LOG.debug("PendingReplicationMonitor checking Q"); } while (iter.hasNext()) { Map.Entry<Block, PendingBlockInfo> entry = iter.next(); PendingBlockInfo pendingBlock = entry.getValue(); if (now > pendingBlock.getTimeStamp() + timeout) { Block block = entry.getKey(); synchronized (timedOutItems) { timedOutItems.add(block); } LOG.warn("PendingReplicationMonitor timed out " + block); iter.remove(); } } } } }