hudi 数据读取以 fileSlice 为单位读取, 依次将 fileSlice 的basefile、logfile数据加载到内存根据新旧数据的合并策略将新旧数据进行merge。
basefile 读取使用format 对应的读取器,没什么额外逻辑。logfile 数据格式是hudi自己定义的,读取时要按固定格式提取记录信息。
HoodieLogFileReader
HoodieLogFileReader: 用来读 mor 表的单个log file, 根据logfile 数据格式构建不同类型的HoodieLogBlock,在读取时会检查Block,并跳过损坏的Block。
public class HoodieLogFileReader implements HoodieLogFormat.Reader {
public static final int DEFAULT_BUFFER_SIZE = 16 * 1024 * 1024; // 16 MB
private static final int BLOCK_SCAN_READ_BUFFER_SIZE = 1024 * 1024; // 1 MB
private static final Logger LOG = LogManager.getLogger(HoodieLogFileReader.class);
private final FSDataInputStream inputStream;
private final HoodieLogFile logFile;
private final byte[] magicBuffer = new byte[6];
private final Schema readerSchema;
private final String keyField;
private boolean readBlockLazily;
private long reverseLogFilePosition;
private long lastReverseLogFilePosition;
private boolean reverseReader;
private boolean enableInlineReading;
private boolean closed = false;
private transient Thread shutdownThread = null;
public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
boolean readBlockLazily) throws IOException {
this(fs, logFile, readerSchema, bufferSize, readBlockLazily, false);
}
public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
boolean readBlockLazily, boolean reverseReader) throws IOException {
this(fs, logFile, readerSchema, bufferSize, readBlockLazily, reverseReader, false,
HoodieRecord.RECORD_KEY_METADATA_FIELD);
}
public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
boolean readBlockLazily, boolean reverseReader, boolean enableInlineReading,
String keyField) throws IOException {
// 最大读 默认16M
FSDataInputStream fsDataInputStream = fs.open(logFile.getPath(), bufferSize);
this.logFile = logFile;
//
this.inputStream = getFSDataInputStream(fsDataInputStream, fs, bufferSize);
this.readerSchema = readerSchema;
this.readBlockLazily = readBlockLazily;
this.reverseReader = reverseReader;
this.enableInlineReading = enableInlineReading;
this.keyField = keyField;
if (this.reverseReader) {
this.reverseLogFilePosition = this.lastReverseLogFilePosition = logFile.getFileSize();
}
addShutDownHook();
}
public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema) throws IOException {
this(fs, logFile, readerSchema, DEFAULT_BUFFER_SIZE, false, false);
}
/**
* 通过包装所需的输入流来获取要使用的正确 {@link FSDataInputStream}。
* Fetch the right {@link FSDataInputStream} to be used by wrapping with required input streams.
* @param fsDataInputStream original instance of {@link FSDataInputStream}.
* @param fs instance of {@link FileSystem} in use.
* @param bufferSize buffer size to be used.
* @return the right {@link FSDataInputStream} as required.
*/
private FSDataInputStream getFSDataInputStream(FSDataInputStream fsDataInputStream, FileSystem fs, int bufferSize) {
if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
return new TimedFSDataInputStream(logFile.getPath(), new FSDataInputStream(
new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize)));
}
// fsDataInputStream.getWrappedStream() maybe a BufferedFSInputStream
// need to wrap in another BufferedFSInputStream the make bufferSize work?
return fsDataInputStream;
}
@Override
public HoodieLogFile getLogFile() {
return logFile;
}
/**
* Close the inputstream if not closed when the JVM exits.
*/
private void addShutDownHook() {
shutdownThread = new Thread(() -> {
try {
close();
} catch (Exception e) {
LOG.warn("unable to close input stream for log file " + logFile, e);
// fail silently for any sort of exception
}
});
Runtime.getRuntime().addShutdownHook(shutdownThread);
}
// TODO : convert content and block length to long by using ByteBuffer, raw byte [] allows
// for max of Integer size
private HoodieLogBlock readBlock() throws IOException {
int blocksize;
int type;
HoodieLogBlockType blockType = null;
Map header = null;
try {
// 1 Read the total size of the block
blocksize = (int) inputStream.readLong();
} catch (EOFException | CorruptedLogFileException e) {
// An exception reading any of the above indicates a corrupt block
// Create a corrupt block by finding the next MAGIC marker or EOF
return createCorruptBlock();
}
// We may have had a crash which could have written this block partially
// Skip blocksize in the stream and we should either find a sync marker (start of the next
// block) or EOF. If we did not find either of it, then this block is a corrupted block.
boolean isCorrupted = isBlockCorrupt(blocksize);
if (isCorrupted) {
return createCorruptBlock();
}
// 2. Read the version for this log format
HoodieLogFormat.LogFormatVersion nextBlockVersion = readVersion();
// 3. Read the block type for a log block
if (nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION) {
type = inputStream.readInt();
ValidationUtils.checkArgument(type < HoodieLogBlockType.values().length, "Invalid block byte type found " + type);
blockType = HoodieLogBlockType.values()[type];
}
// 4. Read the header for a log block, if present
if (nextBlockVersion.hasHeader()) {
header = HoodieLogBlock.getLogMetadata(inputStream);
}
int contentLength = blocksize;
// 5. Read the content length for the content
if (nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION) {
contentLength = (int) inputStream.readLong();
}
// 6. Read the content or skip content based on IO vs Memory trade-off by client
// TODO - have a max block size and reuse this buffer in the ByteBuffer
// (hard to guess max block size for now)
long contentPosition = inputStream.getPos();
byte[] content = HoodieLogBlock.readOrSkipContent(inputStream, contentLength, readBlockLazily);
// 7. Read footer if any
Map footer = null;
if (nextBlockVersion.hasFooter()) {
footer = HoodieLogBlock.getLogMetadata(inputStream);
}
// 8. Read log block length, if present. This acts as a reverse pointer when traversing a
// log file in reverse 这在反向遍历日志文件时充当反向指针
@SuppressWarnings("unused")
long logBlockLength = 0;
if (nextBlockVersion.hasLogBlockLength()) {
logBlockLength = inputStream.readLong();
}
// 9. Read the log block end position in the log file
long blockEndPos = inputStream.getPos();
switch (Objects.requireNonNull(blockType)) {
// based on type read the block
case AVRO_DATA_BLOCK:
if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
return HoodieAvroDataBlock.getBlock(content, readerSchema);
} else {
// 默认版本号1
return new HoodieAvroDataBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, readerSchema, header, footer, keyField);
}
case HFILE_DATA_BLOCK:
return new HoodieHFileDataBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, readerSchema,
header, footer, enableInlineReading, keyField);
case DELETE_BLOCK:
return HoodieDeleteBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, header, footer);
case COMMAND_BLOCK:
return HoodieCommandBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
contentPosition, contentLength, blockEndPos, header, footer);
default:
throw new HoodieNotSupportedException("Unsupported Block " + blockType);
}
}
private HoodieLogBlock createCorruptBlock() throws IOException {
LOG.info("Log " + logFile + " has a corrupted block at " + inputStream.getPos());
long currentPos = inputStream.getPos();
// 通过读取magic 来获取下一个 block
long nextBlockOffset = scanForNextAvailableBlockOffset();
// 倒回到初始开始并读取损坏的字节直到 nextBlockOffset
// Rewind to the initial start and read corrupted bytes till the nextBlockOffset
inputStream.seek(currentPos);
LOG.info("Next available block in " + logFile + " starts at " + nextBlockOffset);
// 错误的block 大小
int corruptedBlockSize = (int) (nextBlockOffset - currentPos);
long contentPosition = inputStream.getPos();
// corrupted Bytes
byte[] corruptedBytes = HoodieLogBlock.readOrSkipContent(inputStream, corruptedBlockSize, readBlockLazily);
return HoodieCorruptBlock.getBlock(logFile, inputStream, Option.ofNullable(corruptedBytes), readBlockLazily,
contentPosition, corruptedBlockSize, corruptedBlockSize, new HashMap<>(), new HashMap<>());
}
/**
* 对比前后两个 block size
* @param blocksize
* @return
* @throws IOException
*/
private boolean isBlockCorrupt(int blocksize) throws IOException {
long currentPos = inputStream.getPos();
try {
inputStream.seek(currentPos + blocksize);
} catch (EOFException e) {
LOG.info("Found corrupted block in file " + logFile + " with block size(" + blocksize + ") running past EOF");
// this is corrupt
// This seek is required because contract of seek() is different for naked DFSInputStream vs BufferedFSInputStream
// release-3.1.0-RC1/DFSInputStream.java#L1455
// release-3.1.0-RC1/BufferedFSInputStream.java#L73
inputStream.seek(currentPos);
return true;
}
// check if the blocksize mentioned in the footer is the same as the header; 比较起始填充的blocksize 是否一致
// by seeking back the length of a long
// the backward seek does not incur additional IO as {@link org.apache.hadoop.hdfs.DFSInputStream#seek()}
// only moves the index. actual IO happens on the next read operation
// 移动到 footer 下的 block length 区域
inputStream.seek(inputStream.getPos() - Long.BYTES);
// Block size in the footer includes the magic header, which the header does not include.
// So we have to shorten the footer block size by the size of magic hash
long blockSizeFromFooter = inputStream.readLong() - magicBuffer.length;
if (blocksize != blockSizeFromFooter) {
LOG.info("Found corrupted block in file " + logFile + ". Header block size(" + blocksize
+ ") did not match the footer block size(" + blockSizeFromFooter + ")");
inputStream.seek(currentPos);
return true;
}
try {
readMagic();
// all good - either we found the sync marker or EOF. Reset position and continue
return false;
} catch (CorruptedLogFileException e) {
// This is a corrupted block
LOG.info("Found corrupted block in file " + logFile + ". No magic hash found right after footer block size entry");
return true;
} finally {
inputStream.seek(currentPos);
}
}
/**
* 当前block error时, 读取下一个可用的block
* @return
* @throws IOException
*/
private long scanForNextAvailableBlockOffset() throws IOException {
// Make buffer large enough to scan through the file as quick as possible especially if it is on S3/GCS.
byte[] dataBuf = new byte[BLOCK_SCAN_READ_BUFFER_SIZE];
boolean eof = false;
while (true) {
long currentPos = inputStream.getPos();
try {
Arrays.fill(dataBuf, (byte) 0);
// 读1M数据到缓存
inputStream.readFully(dataBuf, 0, dataBuf.length);
} catch (EOFException e) {
// 数据流未填满
eof = true;
}
// 通过 MAGIC 在dataBuf 的偏移量
long pos = Bytes.indexOf(dataBuf, HoodieLogFormat.MAGIC);
if (pos >= 0) {
// 返回 magic 位置
return currentPos + pos;
}
if (eof) {
// 读取异常位置
return inputStream.getPos();
}
// 当前未匹配到MAGIC,继续向下查找。- HoodieLogFormat.MAGIC.length 防止最后的自己为 #HUDI
inputStream.seek(currentPos + dataBuf.length - HoodieLogFormat.MAGIC.length);
}
}
@Override
public void close() throws IOException {
if (!closed) {
this.inputStream.close();
if (null != shutdownThread) {
Runtime.getRuntime().removeShutdownHook(shutdownThread);
}
closed = true;
}
}
/*
* hasNext is not idempotent. TODO - Fix this. It is okay for now - PR
*/
@Override
public boolean hasNext() {
try {
// 1. read Magic
return readMagic();
} catch (IOException e) {
throw new HoodieIOException("IOException when reading logfile " + logFile, e);
}
}
/**
* Read log format version from log file.
*/
private HoodieLogFormat.LogFormatVersion readVersion() throws IOException {
return new HoodieLogFormatVersion(inputStream.readInt());
}
private boolean readMagic() throws IOException {
try {
boolean hasMagic = hasNextMagic();
if (!hasMagic) {
throw new CorruptedLogFileException(
logFile + " could not be read. Did not find the magic bytes at the start of the block");
}
return hasMagic;
} catch (EOFException e) {
// We have reached the EOF
return false;
}
}
private boolean hasNextMagic() throws IOException {
// 1. Read magic header from the start of the block
inputStream.readFully(magicBuffer, 0, 6);
return Arrays.equals(magicBuffer, HoodieLogFormat.MAGIC);
}
@Override
public HoodieLogBlock next() {
try {
// hasNext() must be called before next()
return readBlock();
} catch (IOException io) {
throw new HoodieIOException("IOException when reading logblock from log file " + logFile, io);
}
}
/**
* hasPrev is not idempotent.
*/
@Override
public boolean hasPrev() {
try {
if (!this.reverseReader) {
throw new HoodieNotSupportedException("Reverse log reader has not been enabled");
}
reverseLogFilePosition = lastReverseLogFilePosition;
reverseLogFilePosition -= Long.BYTES;
lastReverseLogFilePosition = reverseLogFilePosition;
inputStream.seek(reverseLogFilePosition);
} catch (Exception e) {
// Either reached EOF while reading backwards or an exception
return false;
}
return true;
}
/**
* This is a reverse iterator Note: At any point, an instance of HoodieLogFileReader should either iterate reverse
* (prev) or forward (next). Doing both in the same instance is not supported WARNING : Every call to prev() should be
* preceded with hasPrev()
*/
@Override
public HoodieLogBlock prev() throws IOException {
if (!this.reverseReader) {
throw new HoodieNotSupportedException("Reverse log reader has not been enabled");
}
long blockSize = inputStream.readLong();
long blockEndPos = inputStream.getPos();
// blocksize should read everything about a block including the length as well
try {
inputStream.seek(reverseLogFilePosition - blockSize);
} catch (Exception e) {
// this could be a corrupt block
inputStream.seek(blockEndPos);
throw new CorruptedLogFileException("Found possible corrupted block, cannot read log file in reverse, "
+ "fallback to forward reading of logfile");
}
boolean hasNext = hasNext();
reverseLogFilePosition -= blockSize;
lastReverseLogFilePosition = reverseLogFilePosition;
return next();
}
/**
* Reverse pointer, does not read the block. Return the current position of the log file (in reverse) If the pointer
* (inputstream) is moved in any way, it is the job of the client of this class to seek/reset it back to the file
* position returned from the method to expect correct results
*/
public long moveToPrev() throws IOException {
if (!this.reverseReader) {
throw new HoodieNotSupportedException("Reverse log reader has not been enabled");
}
inputStream.seek(lastReverseLogFilePosition);
long blockSize = inputStream.readLong();
// blocksize should be everything about a block including the length as well
inputStream.seek(reverseLogFilePosition - blockSize);
reverseLogFilePosition -= blockSize;
lastReverseLogFilePosition = reverseLogFilePosition;
return reverseLogFilePosition;
}
@Override
public void remove() {
throw new UnsupportedOperationException("Remove not supported for HoodieLogFileReader");
}
}
HoodieLogFormatReader
HoodieLogFormatReader: 为每个logfile 构建HoodieLogFileReader 来读取一组fileSlice logfile的Block。
public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
private final List logFiles;
// Readers for previously scanned log-files that are still open
private final List prevReadersInOpenState;
private HoodieLogFileReader currentReader;
private final FileSystem fs;
private final Schema readerSchema;
private final boolean readBlocksLazily;
private final boolean reverseLogReader;
private final String recordKeyField;
private final boolean enableInlineReading;
private int bufferSize;
private static final Logger LOG = LogManager.getLogger(HoodieLogFormatReader.class);
HoodieLogFormatReader(FileSystem fs, List logFiles, Schema readerSchema, boolean readBlocksLazily,
boolean reverseLogReader, int bufferSize, boolean enableInlineReading,
String recordKeyField) throws IOException {
this.logFiles = logFiles;
this.fs = fs;
this.readerSchema = readerSchema;
this.readBlocksLazily = readBlocksLazily;
this.reverseLogReader = reverseLogReader;
this.bufferSize = bufferSize;
this.prevReadersInOpenState = new ArrayList<>();
this.recordKeyField = recordKeyField;
this.enableInlineReading = enableInlineReading;
if (logFiles.size() > 0) {
HoodieLogFile nextLogFile = logFiles.remove(0);
this.currentReader = new HoodieLogFileReader(fs, nextLogFile, readerSchema, bufferSize, readBlocksLazily, false,
enableInlineReading, recordKeyField);
}
}
@Override
/**
* Note : In lazy mode, clients must ensure close() should be called only after processing all log-blocks as the
* underlying inputstream will be closed. TODO: We can introduce invalidate() API at HoodieLogBlock and this object
* can call invalidate on all returned log-blocks so that we check this scenario specifically in HoodieLogBlock
*/
public void close() throws IOException {
for (HoodieLogFileReader reader : prevReadersInOpenState) {
reader.close();
}
prevReadersInOpenState.clear();
if (currentReader != null) {
currentReader.close();
}
}
@Override
public boolean hasNext() {
if (currentReader == null) {
return false;
} else if (currentReader.hasNext()) {
return true;
} else if (logFiles.size() > 0) {
try {
// 读取下一个 logfile
HoodieLogFile nextLogFile = logFiles.remove(0);
// First close previous reader only if readBlockLazily is true
if (!readBlocksLazily) {
this.currentReader.close();
} else {
// 懒加载先放prevReadersInOpenState集合
this.prevReadersInOpenState.add(currentReader);
}
// 针对 logfile 构建 FileReader
this.currentReader = new HoodieLogFileReader(fs, nextLogFile, readerSchema, bufferSize, readBlocksLazily, false,
enableInlineReading, recordKeyField);
} catch (IOException io) {
throw new HoodieIOException("unable to initialize read with log file ", io);
}
LOG.info("Moving to the next reader for logfile " + currentReader.getLogFile());
return hasNext();
}
return false;
}
@Override
public HoodieLogBlock next() {
// 通过 HoodieLogFileReader 来读取 LogBlock
return currentReader.next();
}
@Override
public HoodieLogFile getLogFile() {
return currentReader.getLogFile();
}
@Override
public void remove() {}
}
AbstractHoodieLogRecordReader
AbstractHoodieLogRecordReader: 对 block 内容的处理,包含rollback数据的删除、corrupt数据的跳过。合并后的数据存储在 ExternalSpillableMap 中,后续会和baseFile中的数据进行组合,具体可以参考 MergeOnReadInputFormat#reachedEnd 处理流程。
Block消息处理流程:
● 先将data block、delete block存储到队列里,暂时不从datablock中提取record。
● 如果遇到需要回滚的 COMMAND_BLOCK,从队里从后向前删除target instant的block。
● block 全部读取完毕,队列从前向后解析data block 存map,解析delete block 移除map中配置的数据。
AbstractHoodieLogRecordReader#scan
public void scan(Option> keys) {
currentInstantLogBlocks = new ArrayDeque<>();
progress = 0.0f;
totalLogFiles = new AtomicLong(0);
totalRollbacks = new AtomicLong(0);
totalCorruptBlocks = new AtomicLong(0);
totalLogBlocks = new AtomicLong(0);
totalLogRecords = new AtomicLong(0);
HoodieLogFormatReader logFormatReaderWrapper = null;
HoodieTimeline commitsTimeline = this.hoodieTableMetaClient.getCommitsTimeline();
HoodieTimeline completedInstantsTimeline = commitsTimeline.filterCompletedInstants();
HoodieTimeline inflightInstantsTimeline = commitsTimeline.filterInflights();
try {
// Get the key field based on populate meta fields config
// and the table type
final String keyField = getKeyField();
// Iterate over the paths
logFormatReaderWrapper = new HoodieLogFormatReader(fs,
logFilePaths.stream().map(logFile -> new HoodieLogFile(new Path(logFile))).collect(Collectors.toList()),
readerSchema,
readBlocksLazily,
reverseReader,
bufferSize,
!enableFullScan,
keyField);
Set scannedLogFiles = new HashSet<>();
while (logFormatReaderWrapper.hasNext()) {
HoodieLogFile logFile = logFormatReaderWrapper.getLogFile();
LOG.info("Scanning log file " + logFile);
scannedLogFiles.add(logFile);
totalLogFiles.set(scannedLogFiles.size());
// 读取的数据块
// Use the HoodieLogFileReader to iterate through the blocks in the log file
// 读取 block
HoodieLogBlock logBlock = logFormatReaderWrapper.next();
final String instantTime = logBlock.getLogBlockHeader().get(INSTANT_TIME);
totalLogBlocks.incrementAndGet();
if (logBlock.getBlockType() != CORRUPT_BLOCK
&& !HoodieTimeline.compareTimestamps(logBlock.getLogBlockHeader().get(INSTANT_TIME), HoodieTimeline.LESSER_THAN_OR_EQUALS, this.latestInstantTime
)) {
// 大于 latestInstantTime 的数据块直接跳过
// hit a block with instant time greater than should be processed, stop processing further
break;
}
if (logBlock.getBlockType() != CORRUPT_BLOCK && logBlock.getBlockType() != COMMAND_BLOCK) {
// data block 或者 delete block
if (!completedInstantsTimeline.containsOrBeforeTimelineStarts(instantTime)
|| inflightInstantsTimeline.containsInstant(instantTime)) {
// hit an uncommitted block possibly from a failed write, move to the next one and skip processing this one
// 可能由于写入失败而命中未提交的块,移动到下一个并跳过处理这个
continue;
}
if (instantRange.isPresent() && !instantRange.get().isInRange(instantTime)) {
// filter the log block by instant range
// 按 instant 范围过滤日志块
continue;
}
}
switch (logBlock.getBlockType()) {
case HFILE_DATA_BLOCK:
case AVRO_DATA_BLOCK:
LOG.info("Reading a data block from file " + logFile.getPath() + " at instant "
+ logBlock.getLogBlockHeader().get(INSTANT_TIME));
processQueuedBlocks(keys, scannedLogFiles, logBlock);
// store the current block
currentInstantLogBlocks.push(logBlock);
break;
case DELETE_BLOCK:
LOG.info("Reading a delete block from file " + logFile.getPath());
processQueuedBlocks(keys, scannedLogFiles, logBlock);
// store deletes so can be rolled back
currentInstantLogBlocks.push(logBlock);
break;
case COMMAND_BLOCK:
// 避免在重试写入时写入重复数据
// Consider the following scenario 场景
// (Time 0, C1, Task T1) -> Running
// (Time 1, C1, Task T1) -> Failed (Wrote either a corrupt block or a correct
// DataBlock (B1) with commitTime C1
// (Time 2, C1, Task T1.2) -> Running (Task T1 was retried and the attempt number is 2)
// (Time 3, C1, Task T1.2) -> Finished (Wrote a correct DataBlock B2)
// Now a logFile L1 can have 2 correct Datablocks (B1 and B2) which are the same.
// Say, commit C1 eventually failed and a rollback is triggered.
// Rollback will write only 1 rollback block (R1) since it assumes one block is
// written per ingestion batch for a file but in reality we need to rollback (B1 & B2)
// The following code ensures the same rollback block (R1) is used to rollback
// both B1 & B2
LOG.info("Reading a command block from file " + logFile.getPath());
// This is a command block - take appropriate action based on the command
HoodieCommandBlock commandBlock = (HoodieCommandBlock) logBlock;
// 删除数据开始的 instant_time
String targetInstantForCommandBlock =
logBlock.getLogBlockHeader().get(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME);
switch (commandBlock.getType()) { // there can be different types of command blocks
case ROLLBACK_PREVIOUS_BLOCK:
// Rollback the last read log block
// Get commit time from last record block, compare with targetCommitTime,
// rollback only if equal, this is required in scenarios of invalid/extra
// rollback blocks written due to failures during the rollback operation itself
// and ensures the same rollback block (R1) is used to rollback both B1 & B2 with
// same instant_time
int numBlocksRolledBack = 0;
totalRollbacks.incrementAndGet();
while (!currentInstantLogBlocks.isEmpty()) { // currentInstantLogBlocks 如果为空则没有数据可以移除
HoodieLogBlock lastBlock = currentInstantLogBlocks.peek();
// handle corrupt blocks separately since they may not have metadata
if (lastBlock.getBlockType() == CORRUPT_BLOCK) {
LOG.info("Rolling back the last corrupted log block read in " + logFile.getPath());
// CORRUPT_BLOCK 块不会形成有效数据,直接移除
currentInstantLogBlocks.pop();
numBlocksRolledBack++;
} else if (targetInstantForCommandBlock.contentEquals(lastBlock.getLogBlockHeader().get(INSTANT_TIME))) {
// rollback last data block or delete block
LOG.info("Rolling back the last log block read in " + logFile.getPath());
// 移除 target Instant 的 LogBlocks
currentInstantLogBlocks.pop();
numBlocksRolledBack++;
} else if (!targetInstantForCommandBlock
.contentEquals(lastBlock.getLogBlockHeader().get(INSTANT_TIME))) {
// invalid or extra rollback block
// 向前找到不需要移除的 block
LOG.warn("TargetInstantTime " + targetInstantForCommandBlock
+ " invalid or extra rollback command block in " + logFile.getPath());
break;
} else {
// this should not happen ideally
LOG.warn("Unable to apply rollback command block in " + logFile.getPath());
}
}
LOG.info("Number of applied rollback blocks " + numBlocksRolledBack);
break;
default:
throw new UnsupportedOperationException("Command type not yet supported.");
}
break;
case CORRUPT_BLOCK:
LOG.info("Found a corrupt block in " + logFile.getPath());
totalCorruptBlocks.incrementAndGet();
// If there is a corrupt block - we will assume that this was the next data block
currentInstantLogBlocks.push(logBlock);
break;
default:
throw new UnsupportedOperationException("Block type not supported yet");
}
}
// ------------------------------读取结束------------------------------------
// 抽取 block 中的内容
// merge the last read block when all the blocks are done reading
if (!currentInstantLogBlocks.isEmpty()) {
LOG.info("Merging the final data blocks");
// 所有 log block 处理完成后 合并数据
processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size(), keys);
}
// Done
progress = 1.0f;
} catch (IOException e) {
LOG.error("Got IOException when reading log file", e);
throw new HoodieIOException("IOException when reading log file ", e);
} catch (Exception e) {
LOG.error("Got exception when reading log file", e);
throw new HoodieException("Exception when reading log file ", e);
} finally {
try {
if (null != logFormatReaderWrapper) {
logFormatReaderWrapper.close();
}
} catch (IOException ioe) {
// Eat exception as we do not want to mask the original exception that can happen
LOG.error("Unable to close log format reader", ioe);
}
}
}
/**
* 切换 instant block 时处理数据
* @param keys
* @param scannedLogFiles
* @param logBlock
* @throws Exception
*/
private void processQueuedBlocks(Option> keys, Set scannedLogFiles, HoodieLogBlock logBlock) throws Exception {
if (isNewInstantBlock(logBlock) && !readBlocksLazily) {
// 新一批次的数据流入
// If this is a delete data block belonging to a different commit/instant,
// then merge the last blocks and records into the main result
processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size(), keys);
}
}