hadoop fs -copyFromLocal localfile hdfs://...
/** Copy files between FileSystems. */ public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, boolean overwrite, Configuration conf) throws IOException { ... // 为了突出重要代码,这里省略了部分代码 InputStream in=null; OutputStream out = null; try { in = srcFS.open(src); out = dstFS.create(dst, overwrite); IOUtils.copyBytes(in, out, conf, true); } catch (IOException e) { IOUtils.closeStream(out); IOUtils.closeStream(in); throw e; } ... }
OutputStream result = new DFSOutputStream(src, masked, overwrite, replication, blockSize, progress, buffersize, conf.getInt("io.bytes.per.checksum", 512));所以,所有的奥秘就应该在类DFSOutputStream中了。
private Socket s; // 与datanode之间建立的socket连接 private DataOutputStream blockStream; // socket的输出流(client->datanode),用于将数据传输给datanode private DataInputStream blockReplyStream; // socket的输入流(datanode->client),用户收到datanode的确认包
private LinkedList<Packet> dataQueue = new LinkedList<Packet>(); // dataQueue是数据队列,用于保存等待发送给datanode的数据包 private LinkedList<Packet> ackQueue = new LinkedList<Packet>(); // ackQueue是确认队列,保存还没有被datanode确认接收的数据包 ... private DataStreamer streamer = new DataStreamer();; // streamer线程,不停的从dataQueue中取出数据包,发送给datanode private ResponseProcessor response = null; // response线程,用于接收从datanode返回的反馈信息
所以,在向DFSOutputStream中,写入数据(通常是byte数组)的时候,实际的传输过程是:
1、byte[]被封装成64KB的Packet,然后扔进dataQueue中case DataTransferProtocol.OP_WRITE_BLOCK: writeBlock( in );
blockReceiver.receiveBlock(mirrorOut, mirrorIn, replyOut, mirrorAddr, null, targets.length); 几个参数的含义: DataOutputStream mirrOut, // output to next datanode // 下一个datanode的输出流 DataInputStream mirrIn, // input from next datanode // 下一个datanode的输入流 DataOutputStream replyOut, // output to previous datanode // 数据来源节点(可能是最初的client)的输出流 // 用来发送反馈通知包 String mirrAddr, BlockTransferThrottler throttlerArg, int numTargets) throws IOException {
/* * Receive until packet length is zero. */ while (receivePacket() > 0) {}
//不断地从输入流中读取Packet数据: int payloadLen = readNextPacket(); //并将数据传输至下一个datanode节点: mirrorOut.write(buf.array(), buf.position(), buf.remaining()); mirrorOut.flush(); //写入磁盘: out.write(pktBuf, dataOff, len);
line 3043: if (bytesCurBlock == blockSize) { // 问题是:它们能正好相等吗?万一bytesCurBlock > blockSize了怎么办? currentPacket.lastPacketInBlock = true; bytesCurBlock = 0; lastFlushOffset = -1; }
int psize = Math.min((int)(blockSize-bytesCurBlock), writePacketSize); computePacketChunkSize(psize, bytesPerChecksum);
//line 2285: // get new block from namenode. if (blockStream == null) { LOG.debug("Allocating new block"); nodes = nextBlockOutputStream(src); this.setName("DataStreamer for file " + src + " block " + block); response = new ResponseProcessor(nodes); response.start(); }