Compaction实现细节
void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
Env::Priority bg_thread_pri) {
bool made_progress = false;
JobContext job_context(next_job_id_.fetch_add(1), true);
TEST_SYNC_POINT("BackgroundCallCompaction:0");
LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
immutable_db_options_.info_log.get());
{
//加了DB锁
//在backgroundcompaction配置好compaction_job前都加锁(选要compact的文件之类的)
//compaction_job.Run()会解锁,跑完再加锁
InstrumentedMutexLock l(&mutex_);
// This call will unlock/lock the mutex to wait for current running
// IngestExternalFile() calls to finish.
WaitForIngestFile();
num_running_compactions_++;
std::unique_ptr::iterator>
pending_outputs_inserted_elem(new std::list::iterator(
CaptureCurrentFileNumberInPendingOutputs()));
assert((bg_thread_pri == Env::Priority::BOTTOM &&
bg_bottom_compaction_scheduled_) ||
(bg_thread_pri == Env::Priority::LOW && bg_compaction_scheduled_));
//调用compaction
Status s = BackgroundCompaction(&made_progress, &job_context, &log_buffer,
prepicked_compaction, bg_thread_pri);
//...
//compaction完后的处理
}
Compaction流程
Status DBImpl::BackgroundCompaction(bool* made_progress,
JobContext* job_context,
LogBuffer* log_buffer,
PrepickedCompaction* prepicked_compaction,
Env::Priority thread_pri) {
... //判断是否是manual compaction以及compaction空间是否够
auto cfd = PickCompactionFromQueue(&task_token, log_buffer);
if (cfd == nullptr) {
// Can't find any executable task from the compaction queue.
// All tasks have been throttled by compaction thread limiter.
++unscheduled_compactions_;
return Status::Busy();
}
auto* mutable_cf_options = cfd->GetLatestMutableCFOptions();
if (!mutable_cf_options->disable_auto_compactions && !cfd->IsDropped()) {
// NOTE: try to avoid unnecessary copy of MutableCFOptions if
// compaction is not necessary. Need to make sure mutex is held
// until we make a copy in the following code
//挑选需要compaction的file这个之后分析
c.reset(cfd->PickCompaction(*mutable_cf_options, log_buffer));
//。。。
//一些处理
}
if (!c) {
// Nothing to do
ROCKS_LOG_BUFFER(log_buffer, "Compaction nothing to do");
} else if (c->deletion_compaction()) {
// 简单的删除input文件的compaction
// ->If true, then the compaction can be done by simply deleting input files.
// TODO(icanadi) Do we want to honor snapshots here? i.e. not delete old
// file if there is alive snapshot pointing to it
...
} else if (!trivial_move_disallowed && c->IsTrivialMove()) {
//没有merge和split,只需要将文件移动到下一level即可
//Is this a trivial compaction that can be implemented by just
// moving a single input file to the next level (no merging or splitting)
...
} else if (!is_prepicked && c->output_level() > 0 &&
c->output_level() ==
c->column_family_data()
->current()
->storage_info()
->MaxOutputLevel(
immutable_db_options_.allow_ingest_behind) &&
env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) {
// Forward compactions involving last level to the bottom pool if it exists,
// such that compactions unlikely to contribute to write stalls can be
// delayed or deprioritized.
//移动到更bottom的线程去做compaction
...
} else {
//做compaction
...
//构建compactionjob
CompactionJob compaction_job(
job_context->job_id, c.get(), immutable_db_options_,
env_options_for_compaction_, versions_.get(), &shutting_down_,
preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(),
GetDataDir(c->column_family_data(), c->output_path_id()), stats_,
&mutex_, &error_handler_, snapshot_seqs,
earliest_write_conflict_snapshot, snapshot_checker, table_cache_,
&event_logger_, c->mutable_cf_options()->paranoid_file_checks,
c->mutable_cf_options()->report_bg_io_stats, dbname_,
&compaction_job_stats, thread_pri,
is_manual ? &manual_compaction_paused_ : nullptr);
//计算subcompactions
compaction_job.Prepare();
NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
compaction_job_stats, job_context->job_id);
//允许并发做compaction
mutex_.Unlock();
TEST_SYNC_POINT_CALLBACK(
"DBImpl::BackgroundCompaction:NonTrivial:BeforeRun", nullptr);
compaction_job.Run();
TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun");
mutex_.Lock();
status = compaction_job.Install(*c->mutable_cf_options());
if (status.ok()) {
InstallSuperVersionAndScheduleWork(c->column_family_data(),
&job_context->superversion_contexts[0],
*c->mutable_cf_options());
}
*made_progress = true;
TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction",
c->column_family_data());
}
Prepare用来确定是否分割compaction,执行subcompaction
void CompactionJob::Prepare() {
AutoThreadOperationStageUpdater stage_updater(
ThreadStatus::STAGE_COMPACTION_PREPARE);
// Generate file_levels_ for compaction berfore making Iterator
auto* c = compact_->compaction;
assert(c->column_family_data() != nullptr);
assert(c->column_family_data()->current()->storage_info()->NumLevelFiles(
compact_->compaction->level()) > 0);
write_hint_ =
c->column_family_data()->CalculateSSTWriteHint(c->output_level());
bottommost_level_ = c->bottommost_level();
//这里面确定是否要做subcompaction
//判断条件如果是level,那么startlevel要为0,outlevel要>0(或者手动compaction且outlevel>0)
if (c->ShouldFormSubcompactions()) {
{
StopWatch sw(env_, stats_, SUBCOMPACTION_SETUP_TIME);
//在GenSubcompactionBoundaries会解析到对应的sub compaction以及边界
GenSubcompactionBoundaries();
}
assert(sizes_.size() == boundaries_.size() + 1);
for (size_t i = 0; i <= boundaries_.size(); i++) {
Slice* start = i == 0 ? nullptr : &boundaries_[i - 1];
Slice* end = i == boundaries_.size() ? nullptr : &boundaries_[i];
compact_->sub_compact_states.emplace_back(c, start, end, sizes_[i]);
}
RecordInHistogram(stats_, NUM_SUBCOMPACTIONS_SCHEDULED,
compact_->sub_compact_states.size());
} else {
compact_->sub_compact_states.emplace_back(c, nullptr, nullptr);
}
}
Compaction的执行
Status CompactionJob::Run() {
AutoThreadOperationStageUpdater stage_updater(
ThreadStatus::STAGE_COMPACTION_RUN);
TEST_SYNC_POINT("CompactionJob::Run():Start");
log_buffer_->FlushBufferToLog();
LogCompaction();
const size_t num_threads = compact_->sub_compact_states.size();
assert(num_threads > 0);
const uint64_t start_micros = env_->NowMicros();
//为subcompaction启动线程执行(无subcompaction/第一个subcompaction,就在本线程执行)
// Launch a thread for each of subcompactions 1...num_threads-1
std::vector thread_pool;
thread_pool.reserve(num_threads - 1);
for (size_t i = 1; i < compact_->sub_compact_states.size(); i++) {
thread_pool.emplace_back(&CompactionJob::ProcessKeyValueCompaction, this,
&compact_->sub_compact_states[i]);
}
// Always schedule the first subcompaction (whether or not there are also
// others) in the current thread to be efficient with resources
ProcessKeyValueCompaction(&compact_->sub_compact_states[0]);
// Wait for all other threads (if there are any) to finish execution
for (auto& thread : thread_pool) {
thread.join();
}
ProcessKeyValueCompaction
和flushmemtable一样,也是每个file的iterator构成一个merge iterator,一个一个key顺序读出来构建block和index