整个流程可以简单概括:DN发送信号->NN处理并返回结果->DN处理返回指令,本文中以删除文件为例介绍这个流程,相关知识参考:
Hadoop NameNode启动之ReplicationMonitor: http://blog.csdn.net/lihm0_1/article/details/10525795Hadoop DataNode启动之asyncDiskService:http://blog.csdn.net/lihm0_1/article/details/12232977
public void offerService() throws Exception { LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec" + " Initial delay: " + initialBlockReportDelay + "msec"); while (shouldRun) { try { long startTime = now(); //心跳超时,则发送心跳 if (startTime - lastHeartbeat > heartBeatInterval) { //更改最后一次心跳时间 lastHeartbeat = startTime; //下面为发送心跳的函数 DatanodeCommand[] cmds = namenode.sendHeartbeat(dnRegistration,//注册体,上一篇注册时已经介绍过 data.getCapacity(),//该节点的容量 data.getDfsUsed(), //已使用多少 data.getRemaining(),//还有多少可用 xmitsInProgress.get(),//DN向其他节点发送块的次数 getXceiverCount());//DataXceiverServer线程组活跃线程的数量 //统计信息更新 myMetrics.addHeartBeat(now() - startTime); //LOG.info("Just sent heartbeat, with name " + localName); //处理返回指令 if (!processCommand(cmds)) continue; } .... }心跳信号的处理在NN中实现,下面为NameNode.java中的代码
public DatanodeCommand[] sendHeartbeat(DatanodeRegistration nodeReg,
long capacity,
long dfsUsed,
long remaining,
int xmitsInProgress,
int xceiverCount) throws IOException {
//LAYOUT_VERSION和RegistrationID对比
verifyRequest(nodeReg);
//处理心跳信息,会返回一个指令组告诉DN要处理的命令
return namesystem.handleHeartbeat(nodeReg, capacity, dfsUsed, remaining,
xceiverCount, xmitsInProgress);
}
下面函数会判断DN是否应该关闭、块恢复、是否需要块复制、块删除或系统升级,可以看出NN在HDFS集群中是作为控制中心的,每一次检测都会生成相关指令,并发送到DN.
DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg,
long capacity, long dfsUsed, long remaining,
int xceiverCount, int xmitsInProgress) throws IOException {
DatanodeCommand cmd = null;
synchronized (heartbeats) {
synchronized (datanodeMap) {
DatanodeDescriptor nodeinfo = null;
try {
//根据StorageID获得DN的信息
nodeinfo = getDatanode(nodeReg);
} catch(UnregisteredDatanodeException e) {
return new DatanodeCommand[]{DatanodeCommand.REGISTER};
}
// 检测该节点是否需要关闭
if (nodeinfo != null && shouldNodeShutdown(nodeinfo)) {
setDatanodeDead(nodeinfo);
throw new DisallowedDatanodeException(nodeinfo);
}
//如果当前NN没有该节点信息,则说明需要重新注册
if (nodeinfo == null || !nodeinfo.isAlive) {
return new DatanodeCommand[]{DatanodeCommand.REGISTER};
}
//更新节点信息,包括容量、已使用、剩余容量
updateStats(nodeinfo, false);
nodeinfo.updateHeartbeat(capacity, dfsUsed, remaining, xceiverCount);
updateStats(nodeinfo, true);
//检测是否需要块恢复
cmd = nodeinfo.getLeaseRecoveryCommand(Integer.MAX_VALUE);
if (cmd != null) {
return new DatanodeCommand[] {cmd};
}
ArrayList<DatanodeCommand> cmds = new ArrayList<DatanodeCommand>();
//检测挂起的复制块,需要复制的块减去已经复制的块
cmd = nodeinfo.getReplicationCommand(
maxReplicationStreams - xmitsInProgress);
if (cmd != null) {
cmds.add(cmd);
}
//检测失效块
cmd = nodeinfo.getInvalidateBlocks(blockInvalidateLimit);
if (cmd != null) {
cmds.add(cmd);
}
// check access key update
if (isAccessTokenEnabled && nodeinfo.needKeyUpdate) {
cmds.add(new KeyUpdateCommand(accessTokenHandler.exportKeys()));
nodeinfo.needKeyUpdate = false;
}
// 检测均衡器的带宽
if (nodeinfo.getBalancerBandwidth() > 0) {
cmds.add(new BalancerBandwidthCommand(nodeinfo.getBalancerBandwidth()));
// set back to 0 to indicate that datanode has been sent the new value
nodeinfo.setBalancerBandwidth(0);
}
if (!cmds.isEmpty()) {
return cmds.toArray(new DatanodeCommand[cmds.size()]);
}
}
}
//最后检测是否需要升级
cmd = getDistributedUpgradeCommand();
if (cmd != null) {
return new DatanodeCommand[] {cmd};
}
return null;
}
DN接收到的命令会定义在协议类中,如下
public interface DatanodeProtocol extends VersionedProtocol { .... /** * Determines actions that data node should perform * when receiving a datanode command. */ final static int DNA_UNKNOWN = 0; // 未知操作 final static int DNA_TRANSFER = 1; // 向其他DN传输块 final static int DNA_INVALIDATE = 2; // 删除块操作 final static int DNA_SHUTDOWN = 3; // 关闭节点 final static int DNA_REGISTER = 4; // 重新注册 final static int DNA_FINALIZE = 5; // 提交升级 final static int DNA_RECOVERBLOCK = 6; // 请求块恢复 final static int DNA_ACCESSKEYUPDATE = 7; // access key更新 final static int DNA_BALANCERBANDWIDTHUPDATE = 8; // 均衡器带宽更新 .... }以删除数据为例,当我们在客户端输入rm命令时,该命令传递到服务端,首先更新源数据,将相关数据块置为无效,该操作由NN的ReplicationMonitor线程实现,具体见:http://blog.csdn.net/lihm0_1/article/details/10525795 ,一旦有块失效,那么在下次心跳时就会把块删除执行返回给DN,注意handleHeartbeat函数中的一行代码:
cmd = nodeinfo.getInvalidateBlocks(blockInvalidateLimit);其中blockInvalidateLimit为本次删除的最大块数,避免DN负载过大
BlockCommand getInvalidateBlocks(int maxblocks) { //获得本次需删除的块列表 Block[] deleteList = getBlockArray(invalidateBlocks, maxblocks); //构建删除命令,注意该命令的格式:指令类型+删除列表 return deleteList == null? null: new BlockCommand(DatanodeProtocol.DNA_INVALIDATE, deleteList); }服务端完成后,把操作指令返回给DN,代码如下
... //处理返回指令 if (!processCommand(cmds)) continue;在执行命令的时候,DN会循环执行NN返回的指令组,核心代码为processCommand,如下:
private boolean processCommand(DatanodeCommand[] cmds) { if (cmds != null) { for (DatanodeCommand cmd : cmds) { try { //这里开始执行指令 if (processCommand(cmd) == false) { return false; } } catch (IOException ioe) { LOG.warn("Error processing datanode Command", ioe); } } } return true; }下面看指令执行部分,会根据指令类型进入不同分支
private boolean processCommand(DatanodeCommand cmd) throws IOException { if (cmd == null) return true; final BlockCommand bcmd = cmd instanceof BlockCommand? (BlockCommand)cmd: null; //进入分支判断 switch(cmd.getAction()) { case DatanodeProtocol.DNA_TRANSFER: // 向其他DN传输block transferBlocks(bcmd.getBlocks(), bcmd.getTargets()); myMetrics.incrBlocksReplicated(bcmd.getBlocks().length); break; case DatanodeProtocol.DNA_INVALIDATE: //删除块操作 Block toDelete[] = bcmd.getBlocks(); try { if (blockScanner != null) { //更多blockScanner参见:http://blog.csdn.net/lihm0_1/article/details/12437099 blockScanner.deleteBlocks(toDelete); } //这里会使用异步磁盘服务删除块,参见:http://blog.csdn.net/lihm0_1/article/details/12232977 data.invalidate(toDelete); } catch(IOException e) { checkDiskError(); throw e; } myMetrics.incrBlocksRemoved(toDelete.length); break; case DatanodeProtocol.DNA_SHUTDOWN: // 关闭DN this.shutdown(); return false; case DatanodeProtocol.DNA_REGISTER: // 重新注册 LOG.info("DatanodeCommand action: DNA_REGISTER"); if (shouldRun) { register(); } break; case DatanodeProtocol.DNA_FINALIZE: //提交升级 storage.finalizeUpgrade(); break; case UpgradeCommand.UC_ACTION_START_UPGRADE: //升级操作 // start distributed upgrade here processDistributedUpgradeCommand((UpgradeCommand)cmd); break; case DatanodeProtocol.DNA_RECOVERBLOCK://块恢复 recoverBlocks(bcmd.getBlocks(), bcmd.getTargets()); break; case DatanodeProtocol.DNA_ACCESSKEYUPDATE: LOG.info("DatanodeCommand action: DNA_ACCESSKEYUPDATE"); if (isBlockTokenEnabled) { blockTokenSecretManager.setKeys(((KeyUpdateCommand) cmd).getExportedKeys()); } break; case DatanodeProtocol.DNA_BALANCERBANDWIDTHUPDATE://节流器控制 LOG.info("DatanodeCommand action: DNA_BALANCERBANDWIDTHUPDATE"); int vsn = ((BalancerBandwidthCommand) cmd).getBalancerBandwidthVersion(); if (vsn >= 1) { long bandwidth = ((BalancerBandwidthCommand) cmd).getBalancerBandwidthValue(); if (bandwidth > 0) { DataXceiverServer dxcs = (DataXceiverServer) this.dataXceiverServer.getRunnable(); dxcs.balanceThrottler.setBandwidth(bandwidth); } } break; default: LOG.warn("Unknown DatanodeCommand action: " + cmd.getAction()); } return true; }比较常见的执行指令有block的恢复和删除、DN关闭、由于DN负载过高导致的重新注册。