见 CMU 15-445 Project #0 - C++ Primer 中的准备工作。
有关实现的部分说明:
b_plus_tree_page
不难发现,每一个节点本质上都是从缓冲池中通过 Fetch
操作获得的一个页面(准确来说是缓冲池页面的数据部分,这个数据部分通过 reinterpret_cast
强制转换后就是B+树节点的全部内容),因此B+树节点中的 page_id
与缓冲池和磁盘上的页面的 page_id
是一致的。对于分支节点(即内部节点),它保存的是 (BUSTUB_PAGE_SIZE - LEAF_PAGE_HEADER_SIZE) / sizeof(MappingType)
个 GenericKey
与 page_id_t
组成的键值对。而对于叶子结点,它保存的则是 (BUSTUB_PAGE_SIZE - LEAF_PAGE_HEADER_SIZE) / sizeof(MappingType)
个 GenericKey
与 RID
组成的键值对。源码中对于 RID
的定义是一个记录标识符,可见叶子结点中保存的不是实际数据,而是一个键值,因此这里实现的B+树索引是一个非聚簇索引。std::shared_mutex
的大锁,这里加锁的目的并不是为了并发,而是为了让我们的代码能在 CHECKPOINT #2
上运行,从而通过更完善的测试用例检查我们单线程实现中的漏洞。此外,为了能够让我们的代码能在 CHECKPOINT #2
上运行,还需要简单实现一下迭代器,我会在后文给出。注意事项:
key
开始,因为首个 key
是无效的。FetchPage()
或 NewPage()
获取的页面都需要在使用后通过 UnpinPage()
解除引用,不然这个页面会一直驻留在缓冲池(内存)中无法回写到磁盘,最后导致缓冲池溢出。INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::GetValue(const KeyType &key, std::vector<ValueType> *result, Transaction *transaction) -> bool {
std::shared_lock<std::shared_mutex> locker(shared_mutex_);
/* B+树为空 */
if (root_page_id_ == INVALID_PAGE_ID) {
return false;
}
LeafPage *target_leaf_page = FindLeafPage(key);
for (int i = 0; i < target_leaf_page->GetSize(); i++) {
if (comparator_(key, target_leaf_page->KeyAt(i)) == 0) {
/* 查找成功 */
result->emplace_back(target_leaf_page->ValueAt(i));
buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
return true;
}
}
/* 查找失败 */
buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
return false;
}
INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::FindLeafPage(const KeyType &key) -> LeafPage * {
/* B+树为空 */
if (root_page_id_ == INVALID_PAGE_ID) {
return nullptr;
}
auto cur_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(root_page_id_)->GetData());
while (!cur_page->IsLeafPage()) {
auto internal_page = static_cast<InternalPage *>(cur_page);
/* 查找下一层待处理的页面 */
int index = 1;
while (index < cur_page->GetSize() && comparator_(key, internal_page->KeyAt(index)) >= 0) {
index++;
}
cur_page = reinterpret_cast<BPlusTreePage *>(
buffer_pool_manager_->FetchPage(internal_page->ValueAt(index - 1))->GetData());
buffer_pool_manager_->UnpinPage(internal_page->GetPageId(), false);
}
return static_cast<LeafPage *>(cur_page);
}
注意事项:
SetNextPageId()
修改叶子页面指向的下一个页面,B+树的一个特征就是叶子页面串连在一起。key
,叶子页面中发现当前 key
已经存在时应当直接返回。MoveHalfDataTo()
函数实现。而内部页面需要在移动的同时完成插入,不然在分裂后还要判断数据应该插到哪个页面,由 MoveHalfDataAndInsertTo()
函数实现,我这里的思路就是把旧页面中除去第一个键值对(因为分裂肯定是向右分裂)的数据和插入数据整理到一起,然后平均分配到两个内部页面中。INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::Insert(const KeyType &key, const ValueType &value, Transaction *transaction) -> bool {
std::unique_lock<std::shared_mutex> locker(shared_mutex_);
/* B+树为空 */
if (root_page_id_ == INVALID_PAGE_ID) {
auto new_root_page = reinterpret_cast<LeafPage *>(buffer_pool_manager_->NewPage(&root_page_id_)->GetData());
/* 初始化新的根页面 */
new_root_page->Init(root_page_id_, INVALID_PAGE_ID, leaf_max_size_);
new_root_page->InsertByKey(key, value, comparator_);
new_root_page->SetNextPageId(INVALID_PAGE_ID);
UpdateRootPageId(true);
buffer_pool_manager_->UnpinPage(new_root_page->GetPageId(), true);
return true;
}
LeafPage *target_leaf_page = FindLeafPage(key);
/* key重复 */
if (!target_leaf_page->InsertByKey(key, value, comparator_)) {
buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
return false;
}
/* 叶子页面上溢 */
if (target_leaf_page->GetSize() == target_leaf_page->GetMaxSize()) {
HandleLeafOverflow(target_leaf_page);
}
buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), true);
return true;
}
INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::HandleLeafOverflow(LeafPage *target_page) {
if (target_page->IsRootPage()) {
page_id_t split_page_id;
auto split_page = reinterpret_cast<LeafPage *>(buffer_pool_manager_->NewPage(&split_page_id)->GetData());
auto new_root_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->NewPage(&root_page_id_)->GetData());
/* 初始化分裂页面 */
split_page->Init(split_page_id, root_page_id_, leaf_max_size_);
target_page->MoveHalfDataTo(split_page);
/* 初始化新的根页面 */
new_root_page->Init(root_page_id_, INVALID_PAGE_ID, internal_max_size_);
new_root_page->SetKeyAt(0, split_page->KeyAt(0)); // 无任何实际意义的填充值
new_root_page->SetValueAt(0, target_page->GetPageId());
new_root_page->SetKeyAt(1, split_page->KeyAt(0));
new_root_page->SetValueAt(1, split_page->GetPageId());
new_root_page->IncreaseSize(1);
target_page->SetParentPageId(root_page_id_); // 将新根页面设置为旧根页面的父页面
UpdateRootPageId(false);
buffer_pool_manager_->UnpinPage(split_page->GetPageId(), true);
buffer_pool_manager_->UnpinPage(new_root_page->GetPageId(), true);
return;
}
page_id_t split_page_id;
auto split_page = reinterpret_cast<LeafPage *>(buffer_pool_manager_->NewPage(&split_page_id)->GetData());
auto parent_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->FetchPage(target_page->GetParentPageId())->GetData());
/* 初始化分裂页面 */
split_page->Init(split_page_id, parent_page->GetPageId(), leaf_max_size_);
target_page->MoveHalfDataTo(split_page);
/* 判断父页面是否上溢 */
if (parent_page->GetSize() == parent_page->GetMaxSize()) {
HandleInternalOverflow(parent_page, split_page->KeyAt(0), split_page->GetPageId());
} else {
parent_page->InsertByKey(split_page->KeyAt(0), split_page->GetPageId(), comparator_, buffer_pool_manager_);
}
buffer_pool_manager_->UnpinPage(split_page->GetPageId(), true);
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
}
INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::HandleInternalOverflow(InternalPage *target_page, const KeyType &key, const page_id_t &value) {
if (target_page->IsRootPage()) {
page_id_t split_page_id;
auto split_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->NewPage(&split_page_id)->GetData());
auto new_root_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->NewPage(&root_page_id_)->GetData());
/* 初始化分裂页面 */
split_page->Init(split_page_id, root_page_id_, internal_max_size_);
target_page->MoveHalfDataAndInsertTo(split_page, key, value, comparator_, buffer_pool_manager_); // split_page首个key暂时有效
/* 初始化新的根页面 */
new_root_page->Init(root_page_id_, INVALID_PAGE_ID, internal_max_size_);
new_root_page->SetKeyAt(0, split_page->KeyAt(0)); // 无任何实际意义的填充值
new_root_page->SetValueAt(0, target_page->GetPageId());
new_root_page->SetKeyAt(1, split_page->KeyAt(0));
new_root_page->SetValueAt(1, split_page->GetPageId());
new_root_page->IncreaseSize(1);
target_page->SetParentPageId(root_page_id_); // 将新根页面设置为旧根页面的父页面
UpdateRootPageId(false);
buffer_pool_manager_->UnpinPage(split_page->GetPageId(), true);
buffer_pool_manager_->UnpinPage(new_root_page->GetPageId(), true);
return;
}
page_id_t split_page_id;
auto split_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->NewPage(&split_page_id)->GetData());
auto parent_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->FetchPage(target_page->GetParentPageId())->GetData());
/* 初始化分裂页面 */
split_page->Init(split_page_id, target_page->GetParentPageId(), internal_max_size_);
target_page->MoveHalfDataAndInsertTo(split_page, key, value, comparator_, buffer_pool_manager_); // split_page首个key暂时有效
/* 判断父页面是否上溢 */
if (parent_page->GetSize() == parent_page->GetMaxSize()) {
HandleInternalOverflow(parent_page, split_page->KeyAt(0), split_page->GetPageId());
} else {
parent_page->InsertByKey(split_page->KeyAt(0), split_page->GetPageId(), comparator_, buffer_pool_manager_);
}
buffer_pool_manager_->UnpinPage(split_page->GetPageId(), true);
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
}
INDEX_TEMPLATE_ARGUMENTS
auto B_PLUS_TREE_LEAF_PAGE_TYPE::InsertByKey(const KeyType &key, const ValueType &value,
const KeyComparator &comparator) -> bool {
int initial_len = GetSize(); // 插入数据前的长度
int insert_pos = 0; // 插入数据位置
// 2 -> 1 3
// 2 -> 1 2 3
// 2 -> 1
// 2 -> 3
while (insert_pos < initial_len) {
if (comparator(key, array_[insert_pos].first) == 0) {
return false; // key不能重复
}
if (comparator(key, array_[insert_pos].first) > 0) {
insert_pos++;
} else {
break;
}
}
/* 插入位置后面的元素后移 */
IncreaseSize(1);
for (int i = initial_len; i > insert_pos; i--) {
array_[i] = array_[i - 1];
}
/* 插入 */
array_[insert_pos].first = key;
array_[insert_pos].second = value;
return true;
}
INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_LEAF_PAGE_TYPE::MoveHalfDataTo(B_PLUS_TREE_LEAF_PAGE_TYPE *des_page) {
int initial_len = GetSize(); // 移出数据前的长度
for (int i = GetMinSize(), j = 0; i < initial_len; i++, j++) {
des_page->array_[j] = array_[i];
des_page->IncreaseSize(1);
this->DecreaseSize(1);
}
des_page->SetNextPageId(this->GetNextPageId());
this->SetNextPageId(des_page->GetPageId());
}
INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::InsertByKey(const KeyType &key, const ValueType &value,
const KeyComparator &comparator,
BufferPoolManager *buffer_pool_manager) {
/* 查找插入位置 */
int insert_pos = 1;
while (insert_pos < GetSize()) {
assert(!(comparator(key, array_[insert_pos].first) == 0)); // key不能重复
if (comparator(key, array_[insert_pos].first) > 0) {
insert_pos++;
} else {
break;
}
}
assert(insert_pos > 0); // 因为第一个key为无效值所以按照key插入时必须保证在array[0]后面插入
InsertByIndex(insert_pos, key, value, comparator, buffer_pool_manager);
}
INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::MoveHalfDataAndInsertTo(B_PLUS_TREE_INTERNAL_PAGE_TYPE *des_page,
const KeyType &key, const page_id_t &value,
const KeyComparator &comparator,
BufferPoolManager *buffer_pool_manager) {
/* 整合源页面中的数据和待插入的数据 */
std::vector<MappingType > tmp_array(GetMaxSize());
int i = 1; // 遍历array
int j = 0; // 遍历tmp_array
// [invalid key, 1, 3] & 2 -> [1 2 3]
while (i < GetMaxSize() && comparator(array_[i].first, key) < 0) {
tmp_array.at(j) = array_[i];
i++;
j++;
}
tmp_array.at(j++) = std::make_pair(key, value);
while (i < GetMaxSize()) {
tmp_array.at(j) = array_[i];
i++;
j++;
}
/* 将整合后的数据对半分配到两个子页面中 */
this->SetSize(1);
des_page->SetSize(0);
j = 0;
for (i = 1; i < GetMinSize(); i++, j++) {
array_[i] = tmp_array.at(j);
this->IncreaseSize(1);
auto child_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager->FetchPage(tmp_array.at(j).second)->GetData());
child_page->SetParentPageId(this->GetPageId());
buffer_pool_manager->UnpinPage(child_page->GetPageId(), true);
}
for (i = 0; j < GetMaxSize(); i++, j++) {
des_page->array_[i] = tmp_array.at(j); // 首个key也被赋有效值
des_page->IncreaseSize(1);
auto child_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager->FetchPage(tmp_array.at(j).second)->GetData());
child_page->SetParentPageId(des_page->GetPageId());
buffer_pool_manager->UnpinPage(child_page->GetPageId(), true);
}
}
注意事项:
parent_page_id_
。key
为有效值,然后再合并。这个有效值本质就是这个内部页面所在子树的最左侧叶子页面的第一个 key
,我这里通过 FindFistKey()
函数进行查找。UnpinPage()
解除固定后还需要通过 DeletePage()
从内存和外存中删除。INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::Remove(const KeyType &key, Transaction *transaction) {
std::unique_lock<std::shared_mutex> locker(shared_mutex_);
/* B+树为空 */
if (root_page_id_ == INVALID_PAGE_ID) {
return;
}
LeafPage *target_leaf_page = FindLeafPage(key);
/* key不存在 */
if (!target_leaf_page->RemoveByKey(key, comparator_)) {
buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
return;
}
if ((target_leaf_page->GetSize() < target_leaf_page->GetMinSize())) {
if (!target_leaf_page->IsRootPage()) {
/* 非根叶子页面下溢 */
HandleLeafUnderflow(target_leaf_page);
} else if (target_leaf_page->GetSize() == 0) {
/* 根节点为空 */
buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), true);
buffer_pool_manager_->DeletePage(target_leaf_page->GetPageId());
root_page_id_ = INVALID_PAGE_ID;
UpdateRootPageId(false);
} else {
buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), true);
}
} else {
/* 没有下溢发生 */
buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), true);
}
}
INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::HandleLeafUnderflow(LeafPage *target_page) {
int tar_index;
int bro_index;
auto parent_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->FetchPage(target_page->GetParentPageId())->GetData());
auto bro_page = static_cast<LeafPage *>(GetBrotherPage(parent_page, target_page, tar_index, bro_index));
/* 从兄弟页面借取 */
if (bro_page->GetSize() > bro_page->GetMinSize()) {
if (bro_index < tar_index) {
/* 从左兄弟借最后一个数据 */
KeyType bro_last_key = bro_page->KeyAt(bro_page->GetSize() - 1);
ValueType bro_last_value = bro_page->ValueAt(bro_page->GetSize() - 1);
bro_page->RemoveByKey(bro_last_key, comparator_);
target_page->InsertByKey(bro_last_key, bro_last_value, comparator_);
parent_page->SetKeyAt(tar_index, bro_last_key);
} else {
/* 从右兄弟借第一个数据 */
KeyType bro_first_key = bro_page->KeyAt(0);
ValueType bro_first_value = bro_page->ValueAt(0);
bro_page->RemoveByKey(bro_first_key, comparator_);
target_page->InsertByKey(bro_first_key, bro_first_value, comparator_);
parent_page->SetKeyAt(bro_index, bro_page->KeyAt(0));
}
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
buffer_pool_manager_->UnpinPage(bro_page->GetPageId(), true);
buffer_pool_manager_->UnpinPage(target_page->GetPageId(), true);
return;
}
/* 将页面向左合并 */
LeafPage *src_page;
LeafPage *des_page;
int src_index;
if (bro_index < tar_index) {
/* left_bro <- target */
src_page = target_page;
des_page = bro_page;
src_index = tar_index;
} else {
/* target <- right_bro */
src_page = bro_page;
des_page = target_page;
src_index = bro_index;
}
src_page->MoveAllDataTo(des_page);
parent_page->RemoveByIndex(src_index);
buffer_pool_manager_->UnpinPage(src_page->GetPageId(), true);
buffer_pool_manager_->DeletePage(src_page->GetPageId());
if (parent_page->GetSize() < parent_page->GetMinSize()) {
if (!parent_page->IsRootPage()) {
/* 非根内部页面下溢 */
HandleInternalUnderflow(parent_page);
} else if (parent_page->GetSize() == 1) {
/* parent_page为根且仅有des_page一个孩子 */
root_page_id_ = des_page->GetPageId();
des_page->SetParentPageId(INVALID_PAGE_ID);
UpdateRootPageId(false);
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
buffer_pool_manager_->DeletePage(parent_page->GetPageId());
} else {
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
}
} else {
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
}
buffer_pool_manager_->UnpinPage(des_page->GetPageId(), true);
}
INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::HandleInternalUnderflow(InternalPage *target_page) {
/* 从缓冲池获取兄弟页面及相关下标 */
int tar_index;
int bro_index;
auto parent_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->FetchPage(target_page->GetParentPageId())->GetData());
auto bro_page = static_cast<InternalPage *>(GetBrotherPage(parent_page, target_page, tar_index, bro_index));
/* 从兄弟页面借取 */
if (bro_page->GetSize() > bro_page->GetMinSize()) {
if (bro_index < tar_index) {
/* 从左兄弟借最后一个数据 */
KeyType bro_last_key = bro_page->KeyAt(bro_page->GetSize() - 1);
page_id_t bro_last_value = bro_page->ValueAt(bro_page->GetSize() - 1);
bro_page->RemoveByValue(bro_last_value);
target_page->SetKeyAt(0, parent_page->KeyAt(tar_index)); // 临时填充首个key
target_page->InsertByIndex(0, bro_last_key, bro_last_value, comparator_, buffer_pool_manager_);
parent_page->SetKeyAt(tar_index, bro_last_key);
} else {
/* 从右兄弟借第一个数据 */
KeyType bro_first_key = parent_page->KeyAt(bro_index);
page_id_t bro_first_value = bro_page->ValueAt(0);
bro_page->RemoveByValue(bro_first_value);
target_page->InsertByIndex(target_page->GetSize(), bro_first_key, bro_first_value, comparator_, buffer_pool_manager_);
parent_page->SetKeyAt(bro_index, bro_page->KeyAt(0));
}
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
buffer_pool_manager_->UnpinPage(bro_page->GetPageId(), true);
buffer_pool_manager_->UnpinPage(target_page->GetPageId(), true);
return;
}
/* 将页面向左合并 */
InternalPage *src_page;
InternalPage *des_page;
int src_index;
if (bro_index < tar_index) {
/* left_bro <- target */
src_page = target_page;
des_page = bro_page;
src_index = tar_index;
} else {
/* target <- right_bro */
src_page = bro_page;
des_page = target_page;
src_index = bro_index;
}
src_page->SetKeyAt(0, FindFistKey(src_page)); // 临时填充首个key
src_page->MoveAllDataTo(des_page, comparator_, buffer_pool_manager_);
parent_page->RemoveByIndex(src_index);
buffer_pool_manager_->UnpinPage(src_page->GetPageId(), true);
buffer_pool_manager_->DeletePage(src_page->GetPageId());
if (parent_page->GetSize() < parent_page->GetMinSize()) {
if (!parent_page->IsRootPage()) {
/* 非根内部页面下溢 */
HandleInternalUnderflow(parent_page);
} else if (parent_page->GetSize() == 1) {
/* parent_page为根且仅有des_page一个孩子 */
root_page_id_ = des_page->GetPageId();
des_page->SetParentPageId(INVALID_PAGE_ID);
UpdateRootPageId(false);
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
buffer_pool_manager_->DeletePage(parent_page->GetPageId());
} else {
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
}
} else {
buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
}
buffer_pool_manager_->UnpinPage(des_page->GetPageId(), true);
}
INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::GetBrotherPage(InternalPage *parent_page, BPlusTreePage *child_page, int &target_index, int &bro_index) -> BPlusTreePage * {
target_index = parent_page->GetIndexByValue(child_page->GetPageId());
/* 只有左兄弟 */
if (target_index == parent_page->GetSize() - 1) {
auto bro_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(parent_page->ValueAt(target_index - 1))->GetData());
bro_index = target_index - 1;
return bro_page;
}
/* 只有右兄弟 */
if (target_index == 0) {
auto bro_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(parent_page->ValueAt(target_index + 1))->GetData());
bro_index = target_index + 1;
return bro_page;
}
/* 既有左兄弟也有右兄弟 */
auto lbro_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(parent_page->ValueAt(target_index - 1))->GetData());
auto rbro_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(parent_page->ValueAt(target_index + 1))->GetData());
/* 左兄弟优先 */
if (rbro_page->GetSize() > rbro_page->GetMinSize() && lbro_page->GetSize() < lbro_page->GetMinSize()) {
buffer_pool_manager_->UnpinPage(lbro_page->GetPageId(), false);
bro_index = target_index + 1;
return rbro_page;
}
buffer_pool_manager_->UnpinPage(rbro_page->GetPageId(), false);
bro_index = target_index - 1;
return lbro_page;
}
INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::FindFistKey(InternalPage *target_page) -> KeyType {
assert(root_page_id_ != INVALID_PAGE_ID);
auto cur_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(target_page->ValueAt(0))->GetData());
while (!cur_page->IsLeafPage()) {
auto internal_page = static_cast<InternalPage *>(cur_page);
cur_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(internal_page->ValueAt(0))->GetData());
buffer_pool_manager_->UnpinPage(internal_page->GetPageId(), false);
}
buffer_pool_manager_->UnpinPage(cur_page->GetPageId(), false);
return static_cast<LeafPage *>(cur_page)->KeyAt(0);
}
auto GetMinSize() const -> int {
int min_size;
if (IsLeafPage()) {
min_size = max_size_ / 2; // 向下取整
} else {
min_size = (max_size_ + 1) / 2; // 向上取整
}
return min_size;
}
INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_LEAF_PAGE_TYPE::RemoveByIndex(int index) {
for (int i = index; i < GetSize() - 1; i++) {
array_[i] = array_[i + 1];
}
DecreaseSize(1);
}
INDEX_TEMPLATE_ARGUMENTS
auto B_PLUS_TREE_LEAF_PAGE_TYPE::RemoveByKey(const KeyType &key, const KeyComparator &comparator) -> bool {
for (int i = 0; i < GetSize(); i++) {
if (comparator(array_[i].first, key) == 0) {
RemoveByIndex(i);
return true;
}
}
return false;
}
INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_LEAF_PAGE_TYPE::MoveAllDataTo(B_PLUS_TREE_LEAF_PAGE_TYPE *des_page) {
for (int i = 0, j = des_page->GetSize(); i < GetSize(); i++, j++) {
des_page->array_[j] = array_[i];
des_page->IncreaseSize(1);
}
this->SetSize(0);
des_page->SetNextPageId(this->GetNextPageId());
}
INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::RemoveByIndex(int index) {
for (int i = index; i < GetSize() - 1; i++) {
array_[i] = array_[i + 1];
}
DecreaseSize(1);
}
INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::RemoveByValue(const page_id_t &value) {
for (int i = 0; i < GetSize(); i++) {
if (array_[i].second == value) {
RemoveByIndex(i);
}
}
}
INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::UpdateByIndex(int index, const KeyType &key, const ValueType &value, const KeyComparator &comparator, BufferPoolManager *buffer_pool_manager) {
array_[index].first = key;
array_[index].second = value;
auto child_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager->FetchPage(value)->GetData());
child_page->SetParentPageId(GetPageId());
buffer_pool_manager->UnpinPage(value, true);
}
INDEX_TEMPLATE_ARGUMENTS
auto B_PLUS_TREE_INTERNAL_PAGE_TYPE::GetIndexByValue(const ValueType &value) -> int {
for (int i = 0; i < GetSize(); i++) {
if (array_[i].second == value) {
return i;
}
}
return -1;
}
INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::MoveAllDataTo(B_PLUS_TREE_INTERNAL_PAGE_TYPE *des_page, const KeyComparator &comparator, BufferPoolManager *buffer_pool_manager) {
for (int i = 0, j = des_page->GetSize(); i < GetSize(); i++, j++) {
des_page->array_[j] = array_[i];
des_page->IncreaseSize(1);
auto child_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager->FetchPage(array_[i].second)->GetData());
child_page->SetParentPageId(des_page->GetPageId());
buffer_pool_manager->UnpinPage(child_page->GetPageId(), true);
}
this->SetSize(0);
}
这里迭代器的实现主要是为了能够通过 CHECKPOINT #2
,注意自加操作时的跳页处理即可。
INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::Begin() -> INDEXITERATOR_TYPE {
/* B+树为空 */
if (root_page_id_ == INVALID_PAGE_ID) {
return INDEXITERATOR_TYPE();
}
/* 循环寻找最左边的叶子页面 */
auto cur_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(root_page_id_)->GetData());
while (true) {
if (cur_page->IsLeafPage()) {
buffer_pool_manager_->UnpinPage(cur_page->GetPageId(), false);
return INDEXITERATOR_TYPE(cur_page->GetPageId(), 0, buffer_pool_manager_);
}
/* 继续查找 */
page_id_t next_page_id = static_cast<InternalPage *>(cur_page)->ValueAt(0);
auto next_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(next_page_id)->GetData());
buffer_pool_manager_->UnpinPage(cur_page->GetPageId(), false);
cur_page = next_page;
}
}
INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::Begin(const KeyType &key) -> INDEXITERATOR_TYPE {
/* B+树为空 */
if (root_page_id_ == INVALID_PAGE_ID) {
return INDEXITERATOR_TYPE();
}
LeafPage *target_leaf_page = FindLeafPage(key);
int i = 0;
while (i < target_leaf_page->GetSize() && comparator_(target_leaf_page->KeyAt(i), key) < 0) {
i++;
}
buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
return INDEXITERATOR_TYPE(target_leaf_page->GetPageId(), i, buffer_pool_manager_);
}
INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::End() -> INDEXITERATOR_TYPE { return INDEXITERATOR_TYPE(); }
auto operator++() -> INDEXITERATOR_TYPE & {
if (IsEnd()) {
return *this;
}
/* 如果index_加一之后溢出需要进行跳页处理 */
if (++index_ == page_->GetSize()) {
page_id_ = page_->GetNextPageId();
if (page_id_ == INVALID_PAGE_ID) {
buffer_pool_manager_->UnpinPage(page_->GetPageId(), false);
page_ = nullptr;
index_ = -1;
} else {
buffer_pool_manager_->UnpinPage(page_->GetPageId(), false);
page_ = reinterpret_cast<LeafPage *>(buffer_pool_manager_->FetchPage(page_id_)->GetData());
index_ = 0;
}
}
return *this;
}
我这里基于 std::random_device
实现了一个自定义的测试函数,通过指定 USE_RANDOM_DATA
的值选择随机或固定数据。
其中 GetUnpinCount()
函数会统计当前被固定的页面数量,因为 header_page
会始终被固定,因此正常情况下每个操作执行完被固定的页面数量都应该是 1
,如果不是的话说明有页面未被释放或重复释放。GetUnpinCount()
的实现就是给缓冲池添加一个 test_unpin_count_
成员变量,初始为 0
,取页面和新建页面会使其加一,取消页面固定会使其减一。
#define USE_RANDOM_DATA 1
TEST(BPlusTreeTests, RandomTest) {
auto key_schema = ParseCreateStatement("a bigint");
GenericComparator<8> comparator(key_schema.get());
BufferPoolManager *bpm = new BufferPoolManagerInstance(50, new DiskManager("test.db"));
std::random_device random;
#if USE_RANDOM_DATA
int internal_page_max_size = random() % 5 + 2;
int leaf_page_max_size = random() % 5 + internal_page_max_size;
#else
int leaf_page_max_size = 8;
int internal_page_max_size = 5;
#endif
BPlusTree<GenericKey<8>, RID, GenericComparator<8>> tree("test", bpm, comparator, internal_page_max_size,
leaf_page_max_size);
GenericKey<8> index_key;
RID rid;
auto transaction = new Transaction(0);
page_id_t header_page_id;
[[maybe_unused]] auto header_page = bpm->NewPage(&header_page_id);
ASSERT_EQ(header_page_id, HEADER_PAGE_ID);
#if USE_RANDOM_DATA
int len = random() % 1000;
std::vector<int64_t> keys(len);
for (int i = 0; i < len; i++) {
keys.at(i) = random() % 500;
}
#else
std::vector<int64_t> keys = {5, 20, 1, 44, 22, 42, 5, 49, 41, 12, 29, 3, 43, 33, 26, 44, 44, 29, 2, 46};
std::vector<int64_t> del_keys = {41, 42, 3, 5, 26, 1, 49, 29, 20, 5, 44, 33, 29, 22, 12, 44, 44, 2, 46, 43};
#endif
std::cout << leaf_page_max_size << " " << internal_page_max_size << std::endl;
for (size_t i = 0; i < keys.size(); i++) {
std::cout << keys.at(i) << ((i == keys.size() - 1) ? "\n" : ", ");
}
for (auto key : keys) {
rid.Set(static_cast<int32_t>(key >> 32), key);
index_key.SetFromInteger(key);
tree.Insert(index_key, rid, transaction);
tree.Draw(bpm, "/Users/liaohan/CLionProjects/bustub/cmake-build-debug/test/pic");
EXPECT_EQ(bpm->GetUnpinCount(), 1);
}
std::vector<RID> rids;
for (auto key : keys) {
rids.clear();
index_key.SetFromInteger(key);
tree.GetValue(index_key, &rids);
EXPECT_EQ(rids.size(), 1);
EXPECT_EQ(rids[0].GetSlotNum(), key);
EXPECT_EQ(bpm->GetUnpinCount(), 1);
}
#if USE_RANDOM_DATA
std::shuffle(keys.begin(), keys.end(), random);
#else
keys = del_keys;
#endif
for (size_t i = 0; i < keys.size(); i++) {
std::cout << keys.at(i) << ((i == keys.size() - 1) ? "\n" : ", ");
}
for (auto key : keys) {
index_key.SetFromInteger(key);
tree.Remove(index_key, transaction);
if (tree.GetRootPageId() != INVALID_PAGE_ID) {
tree.Draw(bpm, "dot文件路径");
}
EXPECT_EQ(bpm->GetUnpinCount(), 1);
}
bpm->UnpinPage(HEADER_PAGE_ID, true);
EXPECT_EQ(tree.GetRootPageId(), INVALID_PAGE_ID);
EXPECT_EQ(bpm->GetUnpinCount(), 0);
delete transaction;
delete bpm;
remove("test.db");
remove("test.log");
}
参考:
https://xiaolincoding.com/mysql/index/page.html
https://blog.csdn.net/Altair_alpha/article/details/129071063