当我们用命令:
hadoop fs -copyFromLocal localfile hdfs://...
/** Copy files between FileSystems. */
public static boolean copy(FileSystem srcFS, Path src,
FileSystem dstFS, Path dst,
boolean deleteSource,
boolean overwrite,
Configuration conf) throws IOException {
... // 为了突出重要代码,这里省略了部分代码
InputStream in=null;
OutputStream out = null;
try {
in = srcFS.open(src);
out = dstFS.create(dst, overwrite);
IOUtils.copyBytes(in, out, conf, true);
} catch (IOException e) {
IOUtils.closeStream(out);
IOUtils.closeStream(in);
throw e;
}
...
}
OutputStream result = new DFSOutputStream(src, masked,
overwrite, replication, blockSize, progress, buffersize,
conf.getInt("io.bytes.per.checksum", 512));
private Socket s; // 与datanode之间建立的socket连接
private DataOutputStream blockStream; // socket的输出流(client->datanode),用于将数据传输给datanode
private DataInputStream blockReplyStream; // socket的输入流(datanode->client),用户收到datanode的确认包
private LinkedList dataQueue = new LinkedList();
// dataQueue是数据队列,用于保存等待发送给datanode的数据包
private LinkedList ackQueue = new LinkedList();
// ackQueue是确认队列,保存还没有被datanode确认接收的数据包
...
private DataStreamer streamer = new DataStreamer();;
// streamer线程,不停的从dataQueue中取出数据包,发送给datanode
private ResponseProcessor response = null;
// response线程,用于接收从datanode返回的反馈信息
case DataTransferProtocol.OP_WRITE_BLOCK:
writeBlock( in );
blockReceiver.receiveBlock(mirrorOut, mirrorIn, replyOut,
mirrorAddr, null, targets.length);
几个参数的含义:
DataOutputStream mirrOut, // output to next datanode
// 下一个datanode的输出流
DataInputStream mirrIn, // input from next datanode
// 下一个datanode的输入流
DataOutputStream replyOut, // output to previous datanode
// 数据来源节点(可能是最初的client)的输出流
// 用来发送反馈通知包
String mirrAddr, BlockTransferThrottler throttlerArg,
int numTargets) throws IOException {
/*
* Receive until packet length is zero.
*/
while (receivePacket() > 0) {}
不断地从输入流中读取Packet数据:
int payloadLen = readNextPacket();
并将数据传输至下一个datanode节点:
mirrorOut.write(buf.array(), buf.position(), buf.remaining());
mirrorOut.flush();
写入磁盘:
out.write(pktBuf, dataOff, len);
line 3043:
if (bytesCurBlock == blockSize) { // 问题是:它们能正好相等吗?万一bytesCurBlock > blockSize了怎么办?
currentPacket.lastPacketInBlock = true;
bytesCurBlock = 0;
lastFlushOffset = -1;
}
再往下几行:
int psize = Math.min((int)(blockSize-bytesCurBlock), writePacketSize);
computePacketChunkSize(psize, bytesPerChecksum);
line 2285:
// get new block from namenode.
if (blockStream == null) {
LOG.debug("Allocating new block");
nodes = nextBlockOutputStream(src);
this.setName("DataStreamer for file " + src +
" block " + block);
response = new ResponseProcessor(nodes);
response.start();
}