// org.apache.rocketmq.store.AllocateMappedFileService // 创建 MappedFile 的请求 static class AllocateRequest implements Comparable{ // Full file path private String filePath; private int fileSize; private CountDownLatch countDownLatch = new CountDownLatch(1); private volatile MappedFile mappedFile = null; }
创建 MappedFile 是一个异步的过程
// AllocateRequest 放在队列中 private PriorityBlockingQueuerequestQueue = new PriorityBlockingQueue ();
往队列中放入 AllocateRequest
public MappedFile putRequestAndReturnMappedFile(String nextFilePath, String nextNextFilePath, int fileSize) { int canSubmitRequests = 2; // 这个开关默认没有开启 if (this.messageStore.getMessageStoreConfig().isTransientStorePoolEnable()) { if (this.messageStore.getMessageStoreConfig().isFastFailIfNoBufferInStorePool() && BrokerRole.SLAVE != this.messageStore.getMessageStoreConfig().getBrokerRole()) { //if broker is slave, don't fast fail even no buffer in pool canSubmitRequests = this.messageStore.getTransientStorePool().availableBufferNums() - this.requestQueue.size(); } } AllocateRequest nextReq = new AllocateRequest(nextFilePath, fileSize); // 查询 requestTable 中是否有请求 // requestTable 中不含请求,则 nextPutOK 为 true boolean nextPutOK = this.requestTable.putIfAbsent(nextFilePath, nextReq) == null; // 如果原来 requestTable 没有请求,则放入 requestQueue 中 if (nextPutOK) { if (canSubmitRequests <= 0) { log.warn("[NOTIFYME]TransientStorePool is not enough, so create mapped file error, " + "RequestQueueSize : {}, StorePoolSize: {}", this.requestQueue.size(), this.messageStore.getTransientStorePool().availableBufferNums()); this.requestTable.remove(nextFilePath); return null; } boolean offerOK = this.requestQueue.offer(nextReq); if (!offerOK) { log.warn("never expected here, add a request to preallocate queue failed"); } canSubmitRequests--; } // 一次处理两个请求,处理方式同上面 AllocateRequest nextNextReq = new AllocateRequest(nextNextFilePath, fileSize); boolean nextNextPutOK = this.requestTable.putIfAbsent(nextNextFilePath, nextNextReq) == null; if (nextNextPutOK) { if (canSubmitRequests <= 0) { log.warn("[NOTIFYME]TransientStorePool is not enough, so skip preallocate mapped file, " + "RequestQueueSize : {}, StorePoolSize: {}", this.requestQueue.size(), this.messageStore.getTransientStorePool().availableBufferNums()); this.requestTable.remove(nextNextFilePath); } else { boolean offerOK = this.requestQueue.offer(nextNextReq); if (!offerOK) { log.warn("never expected here, add a request to preallocate queue failed"); } } } if (hasException) { log.warn(this.getServiceName() + " service has exception. so return null"); return null; } AllocateRequest result = this.requestTable.get(nextFilePath); try { if (result != null) { boolean waitOK = result.getCountDownLatch().await(waitTimeOut, TimeUnit.MILLISECONDS); if (!waitOK) { log.warn("create mmap timeout " + result.getFilePath() + " " + result.getFileSize()); return null; } else { // waitOK 为 true,表明 mappedFile 已创建好 this.requestTable.remove(nextFilePath); return result.getMappedFile(); } } else { log.error("find preallocate mmap failed, this never happen"); } } catch (InterruptedException e) { log.warn(this.getServiceName() + " service has exception. ", e); } return null; }
从队列中取出 AllocateRequest,创建 MappedFile
// org.apache.rocketmq.store.AllocateMappedFileService#mmapOperation private boolean mmapOperation() { boolean isSuccess = false; AllocateRequest req = null; try { req = this.requestQueue.take(); AllocateRequest expectedRequest = this.requestTable.get(req.getFilePath()); if (null == expectedRequest) { log.warn("this mmap request expired, maybe cause timeout " + req.getFilePath() + " " + req.getFileSize()); return true; } if (expectedRequest != req) { log.warn("never expected here, maybe cause timeout " + req.getFilePath() + " " + req.getFileSize() + ", req:" + req + ", expectedRequest:" + expectedRequest); return true; } if (req.getMappedFile() == null) { long beginTime = System.currentTimeMillis(); MappedFile mappedFile; if (messageStore.getMessageStoreConfig().isTransientStorePoolEnable()) { try { mappedFile = ServiceLoader.load(MappedFile.class).iterator().next(); mappedFile.init(req.getFilePath(), req.getFileSize(), messageStore.getTransientStorePool()); } catch (RuntimeException e) { log.warn("Use default implementation."); mappedFile = new MappedFile(req.getFilePath(), req.getFileSize(), messageStore.getTransientStorePool()); } } else { // 创建 mappedFile mappedFile = new MappedFile(req.getFilePath(), req.getFileSize()); } long elapsedTime = UtilAll.computeElapsedTimeMilliseconds(beginTime); if (elapsedTime > 10) { int queueSize = this.requestQueue.size(); log.warn("create mappedFile spent time(ms) " + elapsedTime + " queue size " + queueSize + " " + req.getFilePath() + " " + req.getFileSize()); } // pre write mappedFile // 文件大小大于 1G,并且开启 warmMapedFileEnable if (mappedFile.getFileSize() >= this.messageStore.getMessageStoreConfig() .getMappedFileSizeCommitLog() && this.messageStore.getMessageStoreConfig().isWarmMapedFileEnable()) { mappedFile.warmMappedFile(this.messageStore.getMessageStoreConfig().getFlushDiskType(), this.messageStore.getMessageStoreConfig().getFlushLeastPagesWhenWarmMapedFile()); } // 设置 mappedFile req.setMappedFile(mappedFile); this.hasException = false; isSuccess = true; } } catch (InterruptedException e) { log.warn(this.getServiceName() + " interrupted, possibly by shutdown."); this.hasException = true; return false; } catch (IOException e) { log.warn(this.getServiceName() + " service has exception. ", e); this.hasException = true; if (null != req) { requestQueue.offer(req); try { Thread.sleep(1); } catch (InterruptedException ignored) { } } } finally { // countDownLatch 减一 if (req != null && isSuccess) req.getCountDownLatch().countDown(); } return true; }
文件预热开关,默认 false,如果设置为 true,则会对文件进行预热
org.apache.rocketmq.store.config.MessageStoreConfig#warmMapedFileEnable
// org.apache.rocketmq.store.MappedFile#warmMappedFile public void warmMappedFile(FlushDiskType type, int pages) { long beginTime = System.currentTimeMillis(); ByteBuffer byteBuffer = this.mappedByteBuffer.slice(); int flush = 0; long time = System.currentTimeMillis(); for (int i = 0, j = 0; i < this.fileSize; i += MappedFile.OS_PAGE_SIZE, j++) { // 每一个 4k 页,写入一个字节 0 byteBuffer.put(i, (byte) 0); // force flush when flush disk type is sync if (type == FlushDiskType.SYNC_FLUSH) { if ((i / OS_PAGE_SIZE) - (flush / OS_PAGE_SIZE) >= pages) { flush = i; mappedByteBuffer.force(); } } // prevent gc if (j % 1000 == 0) { log.info("j={}, costTime={}", j, System.currentTimeMillis() - time); time = System.currentTimeMillis(); try { Thread.sleep(0); } catch (InterruptedException e) { log.error("Interrupted", e); } } } // force flush when prepare load finished if (type == FlushDiskType.SYNC_FLUSH) { log.info("mapped file warm-up done, force to disk, mappedFile={}, costTime={}", this.getFileName(), System.currentTimeMillis() - beginTime); mappedByteBuffer.force(); } log.info("mapped file warm-up done. mappedFile={}, costTime={}", this.getFileName(), System.currentTimeMillis() - beginTime); this.mlock(); }
猜想:
通过 mmap 映射,操作系统只是建立了映射关系,并没有分配内存,通过在每一页中写入数据,才真正分配内存
通过 jna 锁住内存,防止操作系统把指定的内存区域交换到 swap 空间
public void mlock() { final long beginTime = System.currentTimeMillis(); final long address = ((DirectBuffer) (this.mappedByteBuffer)).address(); Pointer pointer = new Pointer(address); { int ret = LibC.INSTANCE.mlock(pointer, new NativeLong(this.fileSize)); log.info("mlock {} {} {} ret = {} time consuming = {}", address, this.fileName, this.fileSize, ret, System.currentTimeMillis() - beginTime); } { int ret = LibC.INSTANCE.madvise(pointer, new NativeLong(this.fileSize), LibC.MADV_WILLNEED); log.info("madvise {} {} {} ret = {} time consuming = {}", address, this.fileName, this.fileSize, ret, System.currentTimeMillis() - beginTime); } }
如果开启了预热,在创建新的文件时,老的文件会释放
public void munlock() { final long beginTime = System.currentTimeMillis(); final long address = ((DirectBuffer) (this.mappedByteBuffer)).address(); Pointer pointer = new Pointer(address); int ret = LibC.INSTANCE.munlock(pointer, new NativeLong(this.fileSize)); log.info("munlock {} {} {} ret = {} time consuming = {}", address, this.fileName, this.fileSize, ret, System.currentTimeMillis() - beginTime); }