首先是读程序的数据结构
SequentialFile* const file_; //文件操作类
Reporter* const reporter_; //错误报告
bool const checksum_; //是否检查校验值
char* const backing_store_;//临时存储从log文件中读取的内容
Slice buffer_; //用于存储读取内容
bool eof_; //是否为文件的结尾
// Offset of the last record returned by ReadRecord.
uint64_t last_record_offset_; //最近一次读取记录的偏移量
// Offset of the first location past the end of buffer_.
uint64_t end_of_buffer_offset_;//保存读取的log文件的偏移量
// Offset at which to start looking for the first record to return
uint64_t const initial_offset_;//初始偏移量
接下来是读文件的方法
在读取文件过程中
bool Reader::ReadRecord(Slice* record, std::string* scratch) {
// 用户传进来的内存区域
scratch->clear();
record->clear();
// 是否在一个切片的record里面
bool in_fragmented_record = false;
// 逻辑record上的offset.
// Record offset of the logical record that we're reading
// 0 is a dummy value to make compilers happy
// 临时记录下kFullType或者kFirstType类型的record 的起始位置.
uint64_t k_first_record_offset = 0;
Slice fragment;//用于存储去除协议头的内容
while (true) {
const unsigned int record_type = ReadPhysicalRecord(&fragment);//
// ReadPhysicalRecord may have only had an empty trailer remaining in its
// internal buffer. Calculate the offset of the next physical record now
// that it has returned, properly accounting for its header size.
// 这里拿到的实际上是刚读出来的record的起始位置.
// buffer_.size()是一个block里面还没有读出来的部分
// kHeaderSize指的是刚读出来的slice的header头的大小
// fragment是读出来的数据部分的长度
// a. end_of_buffer_offset 指向的是buffer_尾部物理上的偏移
// 注意,这里记录的是一个record的开头位置
uint64_t physical_record_offset =
end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size();
// 注意: 如果record_type == kBadRecord
// 那么fragment.size() == 0
// 实际上就是physical_record_offset自动跳过了这个bad record
switch (record_type) {
case kFullType:
// 如果读出来的type是一个full type
// 那么就不应该在一个record中间
// 这里就需要报错
if (in_fragmented_record) {
// Handle bug in earlier versions of log::Writer where
// it could emit an empty kFirstType record at the tail end
// of a block followed by a kFullType or kFirstType record
// at the beginning of the next block.
if (!scratch->empty()) {
ReportCorruption(scratch->size(), "partial record without end(1)");
}
}
// 由于是一个full record
// 所以这里直接清理scratch
scratch->clear();
// 由于fragment是完整的,所以直接赋值,返回之。
// 浅拷贝
*record = fragment;
// 所以last_record_offset_指向的就是一个record的开头位置
// 也就是刚读出来的record的开头位置
last_record_offset_ = physical_record_offset;
return true;
case kFirstType:
// 读到开头的时候,当然也不是说是读到中间了
if (in_fragmented_record) {
// Handle bug in earlier versions of log::Writer where
// it could emit an empty kFirstType record at the tail end
// of a block followed by a kFullType or kFirstType record
// at the beginning of the next block.
if (!scratch->empty()) {
ReportCorruption(scratch->size(), "partial record without end(2)");
}
}
// 记录第一个record的开头位置
k_first_record_offset = physical_record_offset;
// 由于是record的开头,直接assign之。
scratch->assign(fragment.data(), fragment.size());
// 还没有遇到结尾,当然是在record的中间了
in_fragmented_record = true;
break;
case kMiddleType:
if (!in_fragmented_record) {
ReportCorruption(fragment.size(),
"missing start of fragmented record(1)");
} else {
scratch->append(fragment.data(), fragment.size());
}
break;
case kLastType:
if (!in_fragmented_record) {
ReportCorruption(fragment.size(),
"missing start of fragmented record(2)");
} else {
scratch->append(fragment.data(), fragment.size());
*record = Slice(*scratch);
last_record_offset_ = k_first_record_offset;
return true;
}
break;
case kEof:
if (in_fragmented_record) {
// This can be caused by the writer dying immediately after
// writing a physical record but before completing the next; don't
// treat it as a corruption, just ignore the entire logical record.
scratch->clear();
}
return false;
case kBadRecord:
// 注意kBadRecord会继续读
if (in_fragmented_record) {
ReportCorruption(scratch->size(), "error in middle of record");
in_fragmented_record = false;
scratch->clear();
}
break;
default: {
char buf[40];
snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
ReportCorruption(
(fragment.size() + (in_fragmented_record ? scratch->size() : 0)),
buf);
in_fragmented_record = false;
scratch->clear();
break;
}
}
}
return false;
}
读取文件的函数
unsigned int Reader::ReadPhysicalRecord(Slice* result) {
while (true) {
//判断当前buffer内容是否小于协议头长度,如果小于则重新读取一个block
if (buffer_.size() < kHeaderSize) {
if (!eof_) {
//
buffer_.clear();
//调用read方法重新读取一个block
Status status = file_->Read(kBlockSize, &buffer_, backing_store_);
//log文件偏移量增加
end_of_buffer_offset_ += buffer_.size();
if (!status.ok()) {
buffer_.clear();
ReportDrop(kBlockSize, status);
eof_ = true;
return kEof;
} else if (buffer_.size() < kBlockSize) {
eof_ = true;
}
continue;
} else {
buffer_.clear();
return kEof;
}
}
// 解析协议头
const char* header = buffer_.data();
const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff;
const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff;
const unsigned int type = header[6];
const uint32_t length = a | (b << 8);
if (kHeaderSize + length > buffer_.size()) {
size_t drop_size = buffer_.size();
buffer_.clear();
if (!eof_) {
ReportCorruption(drop_size, "bad record length");
return kBadRecord;
}
return kEof;
}
if (type == kZeroType && length == 0) {
buffer_.clear();
return kBadRecord;
}
// 检查校验值
if (checksum_) {
uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
uint32_t actual_crc = crc32c::Value(header + 6, 1 + length);
if (actual_crc != expected_crc) {
size_t drop_size = buffer_.size();
buffer_.clear();
ReportCorruption(drop_size, "checksum mismatch");
return kBadRecord;
}
}
//buffer移除检查完毕的一个记录
buffer_.remove_prefix(kHeaderSize + length);
// Skip physical record that started before initial_offset_
if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length <
initial_offset_) {
result->clear();
return kBadRecord;
}
//将检查的好的记录保存到传参中
*result = Slice(header + kHeaderSize, length);
返回记录的类型
return type;
}
}
读取文件最后调用的是PosixSequentialFile类中的read方法
Status Read(size_t n, Slice* result, char* scratch) override {
Status status;
while (true) {
::ssize_t read_size = ::read(fd_, scratch, n);//读取log文件中的内容,然后保存到临时内存中
if (read_size < 0) { // Read error.
if (errno == EINTR) {
continue; // Retry
}
status = PosixError(filename_, errno);
break;
}
*result = Slice(scratch, read_size);//将临时内存中的内容保存到buffer_中
break;
}
return status;
}