-
kafka是怎么通过偏移量找到对应的消息?
首先消费者消费时会指定Topic和Partition,每个Partition对应物理上的Log文件夹,为了防止Log过大而带来的清理、删除困难,因此Partition分成多个LogSegment,每个LogSegment包含.index(偏移量索引日志)、.log(日志文件)、.timestamp(时间戳索引日志)等文件。其中偏移量索引是稀疏索引,从逻辑偏移量映射到物理偏移量。稀疏索引的稀疏程度,可以由参数()指定。
假定我们要查找偏移量为268的消息,那么需要先定位到消息的日志分段,Kafka是使用ConcurrentSkipListMap(Java util包下面的)来保存各个日志分段的,ConcurrentSkipListMap是一个跳表,增删改查复杂度都是log(n)),(ConcurrentSkipListMap实现原理?Kafka怎么使用ConcurrentSkipListMap?),
private val segments: ConcurrentNavigableMap[java.lang.Long, LogSegment] = new ConcurrentSkipListMap[java.lang.Long, LogSegment]
//这段代码是日志的读取方法,入参有起始偏移量,最大长度等
def read(startOffset: Long,
maxLength: Int,
isolation: FetchIsolation,
minOneMessage: Boolean): FetchDataInfo = {
maybeHandleIOException(s"Exception while reading from $topicPartition in dir ${dir.getParent}") {
trace(s"Reading maximum $maxLength bytes at offset $startOffset from log with " +
s"total length $size bytes")
val includeAbortedTxns = isolation == FetchTxnCommitted
// Because we don't use the lock for reading, the synchronization is a little bit tricky.
// We create the local variables to avoid race conditions with updates to the log.
//读取的时候避免使用锁,而是用临时变量保存LEO,避免竞争
val endOffsetMetadata = nextOffsetMetadata
val endOffset = endOffsetMetadata.messageOffset
var segmentEntry = segments.floorEntry(startOffset)//通过线程安全的跳表,获取日志段
// return error on attempt to read beyond the log end offset or read below log start offset
if (startOffset > endOffset || segmentEntry == null || startOffset < logStartOffset)
throw new OffsetOutOfRangeException(s"Received request for offset $startOffset for partition $topicPartition, " +
s"but we only have log segments in the range $logStartOffset to $endOffset.")
val maxOffsetMetadata = isolation match {
case FetchLogEnd => endOffsetMetadata
case FetchHighWatermark => fetchHighWatermarkMetadata
case FetchTxnCommitted => fetchLastStableOffsetMetadata
}
if (startOffset == maxOffsetMetadata.messageOffset) {
return emptyFetchDataInfo(maxOffsetMetadata, includeAbortedTxns)
} else if (startOffset > maxOffsetMetadata.messageOffset) {
val startOffsetMetadata = convertToOffsetMetadataOrThrow(startOffset)
return emptyFetchDataInfo(startOffsetMetadata, includeAbortedTxns)
}
// Do the read on the segment with a base offset less than the target offset
// but if that segment doesn't contain any messages with an offset greater than that
// continue to read from successive segments until we get some messages or we reach the end of the log
while (segmentEntry != null) {
val segment = segmentEntry.getValue
val maxPosition = {
// Use the max offset position if it is on this segment; otherwise, the segment size is the limit.
if (maxOffsetMetadata.segmentBaseOffset == segment.baseOffset) {
maxOffsetMetadata.relativePositionInSegment
} else {
segment.size
}
}
val fetchInfo = segment.read(startOffset, maxLength, maxPosition, minOneMessage)//这个方法在日志段上读取偏移量的消息
if (fetchInfo == null) {
segmentEntry = segments.higherEntry(segmentEntry.getKey)
} else {
return if (includeAbortedTxns)
addAbortedTransactions(startOffset, segmentEntry, fetchInfo)
else
fetchInfo
}
}
// okay we are beyond the end of the last segment with no data fetched although the start offset is in range,
// this can happen when all messages with offset larger than start offsets have been deleted.
// In this case, we will return the empty set with log end offset metadata
FetchDataInfo(nextOffsetMetadata, MemoryRecords.EMPTY)
}
}
//日志段读取偏移量,先是translateOffset方法找到起始物理偏移量,FetchDataInfo进行读取
@threadsafe
def read(startOffset: Long,
maxSize: Int,
maxPosition: Long = size,
minOneMessage: Boolean = false): FetchDataInfo = {
if (maxSize < 0)
throw new IllegalArgumentException(s"Invalid max size $maxSize for log read from segment $log")
val startOffsetAndSize = translateOffset(startOffset)
// if the start position is already off the end of the log, return null
if (startOffsetAndSize == null)
return null
val startPosition = startOffsetAndSize.position
val offsetMetadata = LogOffsetMetadata(startOffset, this.baseOffset, startPosition)
val adjustedMaxSize =
if (minOneMessage) math.max(maxSize, startOffsetAndSize.size)
else maxSize
// return a log segment but with zero size in the case below
if (adjustedMaxSize == 0)
return FetchDataInfo(offsetMetadata, MemoryRecords.EMPTY)
// calculate the length of the message set to read based on whether or not they gave us a maxOffset
val fetchSize: Int = min((maxPosition - startPosition).toInt, adjustedMaxSize)
FetchDataInfo(offsetMetadata, log.slice(startPosition, fetchSize),
firstEntryIncomplete = adjustedMaxSize < startOffsetAndSize.size)
}
通过跳表我们可以快速定位到baseOffset为251的日志段,268-251=17,算出相对偏移量,然后通过.index稀疏索引,找到第一个不大于17的索引。这里是通过二分查找找到稀疏索引里不大于17的索引是14,然后指向.log文件,从14开始遍历,发现不存在相对偏移量为17的物理地址。这里的firstOffset是这块BatchRecord的起始位置,然后通过position可以定位到这条消息的具体位置。这里使用的是相对偏移量而不是绝对偏移量的原因是节省内存空间,毕竟通过baseOffset就可以知道原来的偏移量是多少了。
通过偏移量找到对应的消息,这块源码还在看,有待更新~
2. LEO和HW的概念
HW是消费者只能拉取HW之前的消息。
LEO标识日志中下一条待写入的消息的offset,ISR集合中最小的LEO即为分区的HW
3. kafka改进的二分法:将数据分为冷区和热区,先在热区使用二分查找,再在冷区进行二分查找
|
IndexSearchType): (Int, Int) = {
// check if the index is empty
if(_entries == 0)
return (-1, -1)
def binarySearch(begin: Int, end: Int) : (Int, Int) = {
// binary search for the entry
var lo = begin
var hi = end
while(lo < hi) {
val mid = (lo + hi + 1) >>> 1
val found = parseEntry(idx, mid)
val compareResult = compareIndexEntry(found, target, searchEntity)
if(compareResult > 0)
hi = mid - 1
else if(compareResult < 0)
lo = mid
else
return (mid, mid)
}
(lo, if (lo == _entries - 1) -1 else lo + 1)
}
val firstHotEntry = Math.max(0, _entries - 1 - _warmEntries)
// check if the target offset is in the warm section of the index
if(compareIndexEntry(parseEntry(idx, firstHotEntry), target, searchEntity) < 0) {
return binarySearch(firstHotEntry, _entries - 1)
}
// check if the target offset is smaller than the least offset
if(compareIndexEntry(parseEntry(idx, 0), target, searchEntity) > 0)
return (-1, 0)
binarySearch(0, firstHotEntry)
}