Netty的写操作由两个步骤组成:
上一篇文章分析了write,本文接着分析第二步flush:
////DefaultChannelHandlerContext public ChannelHandlerContext flush() { final DefaultChannelHandlerContext next = findContextOutbound(); EventExecutor executor = next.executor(); if (executor.inEventLoop()) { next.invokeFlush(); } else { Runnable task = next.invokeFlushTask; if (task == null) { next.invokeFlushTask = task = new Runnable() { @Override public void run() { next.invokeFlush(); } }; } executor.execute(task); } return this; } private void invokeFlush() { try { ((ChannelOutboundHandler) handler).flush(this); } catch (Throwable t) { notifyHandlerException(t); } }
由于flush是Outbound事件,所以会调用headHandler的flush方法
//HeadHandler public void flush(ChannelHandlerContext ctx) throws Exception { unsafe.flush(); }
HeadHandler调用abstractUnsafe的flush方法
//AbstractUnsafe public void flush() { ChannelOutboundBuffer outboundBuffer = this.outboundBuffer; if (outboundBuffer == null) { return; } outboundBuffer.addFlush(); flush0(); }
outboundBuffer 是之前第一步中msg所存储的地方,通过调用outboundBuffer.addFlush(),将outboundBuffer 的unflushed置为tail,这样本次等待flush的msg在buffer数组中的位置区间就为[flushed, unflushed)。
//ChannelOutboundBuffer void addFlush() { unflushed = tail; }
接下来的flush0方法将这个区间的msg写到套接字的发送缓冲区中。
//ChannelOutboundBuffer protected void flush0() { if (inFlush0) { // Avoid re-entrance return; } final ChannelOutboundBuffer outboundBuffer = this.outboundBuffer; if (outboundBuffer == null || outboundBuffer.isEmpty()) { return; } inFlush0 = true; // Mark all pending write requests as failure if the channel is inactive. if (!isActive()) { try { if (isOpen()) { outboundBuffer.failFlushed(NOT_YET_CONNECTED_EXCEPTION); } else { outboundBuffer.failFlushed(CLOSED_CHANNEL_EXCEPTION); } } finally { inFlush0 = false; } return; } try { doWrite(outboundBuffer); } catch (Throwable t) { outboundBuffer.failFlushed(t); if (t instanceof IOException) { close(voidPromise()); } } finally { inFlush0 = false; } }
主要逻辑在doWrite方法里
//NioSocketChannel protected void doWrite(ChannelOutboundBuffer in) throws Exception { for (;;) { // Do non-gathering write for a single buffer case. final int msgCount = in.size(); if (msgCount <= 1) { super.doWrite(in); return; } // Ensure the pending writes are made of ByteBufs only. ByteBuffer[] nioBuffers = in.nioBuffers(); if (nioBuffers == null) { super.doWrite(in); return; } int nioBufferCnt = in.nioBufferCount(); long expectedWrittenBytes = in.nioBufferSize(); final SocketChannel ch = javaChannel(); long writtenBytes = 0; boolean done = false; boolean setOpWrite = false; for (int i = config().getWriteSpinCount() - 1; i >= 0; i --) { final long localWrittenBytes = ch.write(nioBuffers, 0, nioBufferCnt); if (localWrittenBytes == 0) { setOpWrite = true; break; } expectedWrittenBytes -= localWrittenBytes; writtenBytes += localWrittenBytes; if (expectedWrittenBytes == 0) { done = true; break; } } if (done) { // Release all buffers for (int i = msgCount; i > 0; i --) { in.remove(); } // Finish the write loop if no new messages were flushed by in.remove(). if (in.isEmpty()) { clearOpWrite(); break; } } else { // Did not write all buffers completely. // Release the fully written buffers and update the indexes of the partially written buffer. for (int i = msgCount; i > 0; i --) { final ByteBuf buf = (ByteBuf) in.current(); final int readerIndex = buf.readerIndex(); final int readableBytes = buf.writerIndex() - readerIndex; if (readableBytes < writtenBytes) { in.progress(readableBytes); in.remove(); writtenBytes -= readableBytes; } else if (readableBytes > writtenBytes) { buf.readerIndex(readerIndex + (int) writtenBytes); in.progress(writtenBytes); break; } else { // readableBytes == writtenBytes in.progress(readableBytes); in.remove(); break; } } incompleteWrite(setOpWrite); break; } } }
逻辑如下
a) 首先分析super.doWrite(in)方法,即父类AbstractNioByteChannel的doWrite方法
protected void doWrite(ChannelOutboundBuffer in) throws Exception { int writeSpinCount = -1; for (;;) { Object msg = in.current(); if (msg == null) { // Wrote all messages. clearOpWrite(); break; } if (msg instanceof ByteBuf) { ByteBuf buf = (ByteBuf) msg; int readableBytes = buf.readableBytes(); if (readableBytes == 0) { in.remove(); continue; } if (!buf.isDirect()) { ByteBufAllocator alloc = alloc(); if (alloc.isDirectBufferPooled()) { // Non-direct buffers are copied into JDK's own internal direct buffer on every I/O. // We can do a better job by using our pooled allocator. If the current allocator does not // pool a direct buffer, we rely on JDK's direct buffer pool. buf = alloc.directBuffer(readableBytes).writeBytes(buf); in.current(buf); } } boolean setOpWrite = false; boolean done = false; long flushedAmount = 0; if (writeSpinCount == -1) { writeSpinCount = config().getWriteSpinCount(); } for (int i = writeSpinCount - 1; i >= 0; i --) { int localFlushedAmount = doWriteBytes(buf); if (localFlushedAmount == 0) { setOpWrite = true; break; } flushedAmount += localFlushedAmount; if (!buf.isReadable()) { done = true; break; } } in.progress(flushedAmount); if (done) { in.remove(); } else { incompleteWrite(setOpWrite); break; } } else if (msg instanceof FileRegion) { FileRegion region = (FileRegion) msg; boolean setOpWrite = false; boolean done = false; long flushedAmount = 0; if (writeSpinCount == -1) { writeSpinCount = config().getWriteSpinCount(); } for (int i = writeSpinCount - 1; i >= 0; i --) { long localFlushedAmount = doWriteFileRegion(region); if (localFlushedAmount == 0) { setOpWrite = true; break; } flushedAmount += localFlushedAmount; if (region.transfered() >= region.count()) { done = true; break; } } in.progress(flushedAmount); if (done) { in.remove(); } else { incompleteWrite(setOpWrite); break; } } else { throw new UnsupportedOperationException("unsupported message type: " + StringUtil.simpleClassName(msg)); } } }
protected final void incompleteWrite(boolean setOpWrite) { // Did not write completely. if (setOpWrite) { setOpWrite(); } else { // Schedule flush again later so other tasks can be picked up in the meantime Runnable flushTask = this.flushTask; if (flushTask == null) { flushTask = this.flushTask = new Runnable() { @Override public void run() { flush(); } }; } eventLoop().execute(flushTask); } } protected final void setOpWrite() { final SelectionKey key = selectionKey(); final int interestOps = key.interestOps(); if ((interestOps & SelectionKey.OP_WRITE) == 0) { key.interestOps(interestOps | SelectionKey.OP_WRITE); } }
说明:
//NioSocketChannel protected int doWriteBytes(ByteBuf buf) throws Exception { final int expectedWrittenBytes = buf.readableBytes(); final int writtenBytes = buf.readBytes(javaChannel(), expectedWrittenBytes); return writtenBytes; } //UnpooledHeapByteBuf public int readBytes(GatheringByteChannel out, int length) throws IOException { checkReadableBytes(length); int readBytes = getBytes(readerIndex, out, length, true); readerIndex += readBytes; return readBytes; } private int getBytes(int index, GatheringByteChannel out, int length, boolean internal) throws IOException { ensureAccessible(); ByteBuffer tmpBuf; if (internal) { tmpBuf = internalNioBuffer(); } else { tmpBuf = ByteBuffer.wrap(array); } return out.write((ByteBuffer) tmpBuf.clear().position(index).limit(index + length)); }
由于是非阻塞IO,所以最终写到发送缓冲区中的字节数writtenBytes可能会小于buf中期望写出的字节数expectedWrittenBytes。如果此时不再写,而是依赖selector的异步通知,则会导致buf里剩下的数据不能及时写出去(因为必须等到selector的下一次循环,即必须将本次循环中通知的未处理完的所有事件处理完后,以及剩下的task执行完后,然后再执行一次select,才能处理到这个channel的write事件;在这个过程中,还有可能会发生线程的上下文切换。这样,就会导致msg写到ChannelOutBoundBuffer后,会经历较大的延迟才能将消息flush到套接字的发送缓冲区。
Netty采用类似于自旋锁的逻辑,在一个循环内,多次调用write。这样就有可能将buf中的所有数据在一次flush调用中写完。循环的次数值为writeSpinCount,其默认值为16。
但是如果一次write调用返回0,则说明发送缓冲区已经完全没有空间了,如果还继续调用write,而系统调用开销是比较大的,所以是一种浪费,此种情况可以退出循环,设置selectionKey的OP_WRITE位,以依赖selector的异步通知。
如果在自旋期间多次调用write后,数据还是没有写完,而每次write调用的返回又不是0,说明每次的write确实写出去了一些字节,这种情况也不能立即退出flush再依赖selector的异步通知,因为有可能是自旋锁的循环次数设置小了导致buf的数据没有发送完,但实际发送缓冲区还是有空间的。因此将剩下数据的写作为一个异步任务放到当前线程的任务队列中,等待调度执行。这样当本次循环中选择的剩下的所有事件处理完后,就可以执行这个任务了,而不用等到由下次的selector唤醒。
//ChannelOutBoundBuffer public boolean remove() { if (isEmpty()) { return false; } Entry e = buffer[flushed]; Object msg = e.msg; if (msg == null) { return false; } ChannelPromise promise = e.promise; int size = e.pendingSize; e.clear(); flushed = flushed + 1 & buffer.length - 1; safeRelease(msg); promise.trySuccess(); decrementPendingOutboundBytes(size); return true; }
首先对buffer[flushed]对应的Entry执行clear操作
//Entry public void clear() { buffers = null; buf = null; msg = null; promise = null; progress = 0; total = 0; pendingSize = 0; count = -1; }
然后将flunshed累加1,接着对msg执行基于引用计数的release操作,最后看一下decrementPendingOutboundBytes方法的实现
void decrementPendingOutboundBytes(int size) { // Cache the channel and check for null to make sure we not produce a NPE in case of the Channel gets // recycled while process this method. Channel channel = this.channel; if (size == 0 || channel == null) { return; } long oldValue = totalPendingSize; long newWriteBufferSize = oldValue - size; while (!TOTAL_PENDING_SIZE_UPDATER.compareAndSet(this, oldValue, newWriteBufferSize)) { oldValue = totalPendingSize; newWriteBufferSize = oldValue - size; } int lowWaterMark = channel.config().getWriteBufferLowWaterMark(); if (newWriteBufferSize == 0 || newWriteBufferSize < lowWaterMark) { if (WRITABLE_UPDATER.compareAndSet(this, 0, 1)) { channel.pipeline().fireChannelWritabilityChanged(); } } }
更新ChannelOutBoundBuffer中待发送的msg大小totalPendingSize,并判断如果totalPendingSize小于channel的低水位线,则设置channel为可写,并触发ChannelWritabilityChanged事件。
b)最后,分析gathering write逻辑:
//NioSocketChannel protected void doWrite(ChannelOutboundBuffer in) throws Exception { for (;;) { //之前已介绍非gathering write的逻辑,所以此处省略相关代码 int nioBufferCnt = in.nioBufferCount(); long expectedWrittenBytes = in.nioBufferSize(); final SocketChannel ch = javaChannel(); long writtenBytes = 0; boolean done = false; for (int i = config().getWriteSpinCount() - 1; i >= 0; i --) { final long localWrittenBytes = ch.write(nioBuffers, 0, nioBufferCnt); if (localWrittenBytes == 0) { break; } expectedWrittenBytes -= localWrittenBytes; writtenBytes += localWrittenBytes; if (expectedWrittenBytes == 0) { done = true; break; } } if (done) { // Release all buffers for (int i = msgCount; i > 0; i --) { in.remove(); } // Finish the write loop if no new messages were flushed by in.remove(). if (in.isEmpty()) { clearOpWrite(); break; } } else { // Did not write all buffers completely. // Release the fully written buffers and update the indexes of the partially written buffer. for (int i = msgCount; i > 0; i --) { final ByteBuf buf = (ByteBuf) in.current(); final int readerIndex = buf.readerIndex(); final int readableBytes = buf.writerIndex() - readerIndex; if (readableBytes < writtenBytes) { in.progress(readableBytes); in.remove(); writtenBytes -= readableBytes; } else if (readableBytes > writtenBytes) { buf.readerIndex(readerIndex + (int) writtenBytes); in.progress(writtenBytes); break; } else { // readableBytes == writtenBytes in.progress(readableBytes); in.remove(); break; } } setOpWrite(); break; } } }
说明: