CMU 15-445 Project #2 - B+Tree(CHECKPOINT #1)

CHECKPOINT #1

  • 一、题目链接
  • 二、准备工作
  • 三、部分实现
    • 1.查找操作
    • 2.插入操作
      • B+树中的插入函数及上溢处理函数
      • 叶子页面中的数据操作函数
      • 内部页面中的数据操作函数
    • 3.删除操作
      • B+树中的删除函数及下溢处理函数
      • 页面基类中的最小值计算函数
      • 叶子页面中的数据操作函数
      • 内部页面中的数据操作函数
    • 4.迭代器
      • B+树中的迭代器获取函数
      • 迭代器自加操作
  • 四、自定义测试用例
  • 五、评测结果

在这里插入图片描述

一、题目链接


二、准备工作

见 CMU 15-445 Project #0 - C++ Primer 中的准备工作。


三、部分实现

有关实现的部分说明:

  • 对于B+树的节点定义,通过节点类的命名 b_plus_tree_page 不难发现,每一个节点本质上都是从缓冲池中通过 Fetch 操作获得的一个页面(准确来说是缓冲池页面的数据部分,这个数据部分通过 reinterpret_cast 强制转换后就是B+树节点的全部内容),因此B+树节点中的 page_id 与缓冲池和磁盘上的页面的 page_id 是一致的。对于分支节点(即内部节点),它保存的是 (BUSTUB_PAGE_SIZE - LEAF_PAGE_HEADER_SIZE) / sizeof(MappingType)GenericKeypage_id_t 组成的键值对。而对于叶子结点,它保存的则是 (BUSTUB_PAGE_SIZE - LEAF_PAGE_HEADER_SIZE) / sizeof(MappingType)GenericKeyRID 组成的键值对。源码中对于 RID 的定义是一个记录标识符,可见叶子结点中保存的不是实际数据,而是一个键值,因此这里实现的B+树索引是一个非聚簇索引
  • 实现时所有可能存在并发问题的函数都添加了一个基于 std::shared_mutex 的大锁,这里加锁的目的并不是为了并发,而是为了让我们的代码能在 CHECKPOINT #2 上运行,从而通过更完善的测试用例检查我们单线程实现中的漏洞。此外,为了能够让我们的代码能在 CHECKPOINT #2 上运行,还需要简单实现一下迭代器,我会在后文给出。

1.查找操作

注意事项:

  1. 查询过程中内部页面的比较要从第二个 key 开始,因为首个 key 是无效的。
  2. 每一个通过 FetchPage()NewPage() 获取的页面都需要在使用后通过 UnpinPage() 解除引用,不然这个页面会一直驻留在缓冲池(内存)中无法回写到磁盘,最后导致缓冲池溢出。
INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::GetValue(const KeyType &key, std::vector<ValueType> *result, Transaction *transaction) -> bool {
    std::shared_lock<std::shared_mutex> locker(shared_mutex_);

    /* B+树为空 */
    if (root_page_id_ == INVALID_PAGE_ID) {
        return false;
    }

    LeafPage *target_leaf_page = FindLeafPage(key);

    for (int i = 0; i < target_leaf_page->GetSize(); i++) {
        if (comparator_(key, target_leaf_page->KeyAt(i)) == 0) {
            /* 查找成功 */
            result->emplace_back(target_leaf_page->ValueAt(i));
            buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
            return true;
        }
    }

    /* 查找失败 */
    buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
    return false;
}

INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::FindLeafPage(const KeyType &key) -> LeafPage * {
    /* B+树为空 */
    if (root_page_id_ == INVALID_PAGE_ID) {
        return nullptr;
    }

    auto cur_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(root_page_id_)->GetData());

    while (!cur_page->IsLeafPage()) {
        auto internal_page = static_cast<InternalPage *>(cur_page);

        /* 查找下一层待处理的页面 */
        int index = 1;
        while (index < cur_page->GetSize() && comparator_(key, internal_page->KeyAt(index)) >= 0) {
            index++;
        }

        cur_page = reinterpret_cast<BPlusTreePage *>(
                buffer_pool_manager_->FetchPage(internal_page->ValueAt(index - 1))->GetData());
        buffer_pool_manager_->UnpinPage(internal_page->GetPageId(), false);
    }

    return static_cast<LeafPage *>(cur_page);
}

2.插入操作

注意事项:

  1. 叶子页面分裂之后需要通过 SetNextPageId() 修改叶子页面指向的下一个页面,B+树的一个特征就是叶子页面串连在一起。
  2. B+树不能存储重复的 key,叶子页面中发现当前 key 已经存在时应当直接返回。
  3. 叶子页面的上溢判断在插入后,内部页面的上溢判断在插入前。换句话说,叶子页面在插入后的长度只要达到了上限就要分裂,但是内部页面插入后的长度可以达到上限,它会在下一次插入前分裂。
  4. 因为上溢判断的时机不同,叶子页面分裂时,只需要在插入后将旧页面的一半数据移动到新页面即可,由 MoveHalfDataTo() 函数实现。而内部页面需要在移动的同时完成插入,不然在分裂后还要判断数据应该插到哪个页面,由 MoveHalfDataAndInsertTo() 函数实现,我这里的思路就是把旧页面中除去第一个键值对(因为分裂肯定是向右分裂)的数据和插入数据整理到一起,然后平均分配到两个内部页面中。

B+树中的插入函数及上溢处理函数

INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::Insert(const KeyType &key, const ValueType &value, Transaction *transaction) -> bool {
    std::unique_lock<std::shared_mutex> locker(shared_mutex_);

    /* B+树为空 */
    if (root_page_id_ == INVALID_PAGE_ID) {
        auto new_root_page = reinterpret_cast<LeafPage *>(buffer_pool_manager_->NewPage(&root_page_id_)->GetData());

        /* 初始化新的根页面 */
        new_root_page->Init(root_page_id_, INVALID_PAGE_ID, leaf_max_size_);
        new_root_page->InsertByKey(key, value, comparator_);
        new_root_page->SetNextPageId(INVALID_PAGE_ID);

        UpdateRootPageId(true);

        buffer_pool_manager_->UnpinPage(new_root_page->GetPageId(), true);
        return true;
    }

    LeafPage *target_leaf_page = FindLeafPage(key);

    /* key重复 */
    if (!target_leaf_page->InsertByKey(key, value, comparator_)) {
        buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
        return false;
    }

    /* 叶子页面上溢 */
    if (target_leaf_page->GetSize() == target_leaf_page->GetMaxSize()) {
        HandleLeafOverflow(target_leaf_page);
    }

    buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), true);
    return true;
}

INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::HandleLeafOverflow(LeafPage *target_page) {
    if (target_page->IsRootPage()) {
        page_id_t split_page_id;
        auto split_page = reinterpret_cast<LeafPage *>(buffer_pool_manager_->NewPage(&split_page_id)->GetData());
        auto new_root_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->NewPage(&root_page_id_)->GetData());

        /* 初始化分裂页面 */
        split_page->Init(split_page_id, root_page_id_, leaf_max_size_);
        target_page->MoveHalfDataTo(split_page);

        /* 初始化新的根页面 */
        new_root_page->Init(root_page_id_, INVALID_PAGE_ID, internal_max_size_);
        new_root_page->SetKeyAt(0, split_page->KeyAt(0));  // 无任何实际意义的填充值
        new_root_page->SetValueAt(0, target_page->GetPageId());
        new_root_page->SetKeyAt(1, split_page->KeyAt(0));
        new_root_page->SetValueAt(1, split_page->GetPageId());
        new_root_page->IncreaseSize(1);
        target_page->SetParentPageId(root_page_id_);  // 将新根页面设置为旧根页面的父页面

        UpdateRootPageId(false);

        buffer_pool_manager_->UnpinPage(split_page->GetPageId(), true);
        buffer_pool_manager_->UnpinPage(new_root_page->GetPageId(), true);
        return;
    }

    page_id_t split_page_id;
    auto split_page = reinterpret_cast<LeafPage *>(buffer_pool_manager_->NewPage(&split_page_id)->GetData());
    auto parent_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->FetchPage(target_page->GetParentPageId())->GetData());

    /* 初始化分裂页面 */
    split_page->Init(split_page_id, parent_page->GetPageId(), leaf_max_size_);
    target_page->MoveHalfDataTo(split_page);

    /* 判断父页面是否上溢 */
    if (parent_page->GetSize() == parent_page->GetMaxSize()) {
        HandleInternalOverflow(parent_page, split_page->KeyAt(0), split_page->GetPageId());
    } else {
        parent_page->InsertByKey(split_page->KeyAt(0), split_page->GetPageId(), comparator_, buffer_pool_manager_);
    }

    buffer_pool_manager_->UnpinPage(split_page->GetPageId(), true);
    buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
}

INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::HandleInternalOverflow(InternalPage *target_page, const KeyType &key, const page_id_t &value) {
    if (target_page->IsRootPage()) {
        page_id_t split_page_id;
        auto split_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->NewPage(&split_page_id)->GetData());
        auto new_root_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->NewPage(&root_page_id_)->GetData());

        /* 初始化分裂页面 */
        split_page->Init(split_page_id, root_page_id_, internal_max_size_);
        target_page->MoveHalfDataAndInsertTo(split_page, key, value, comparator_, buffer_pool_manager_);  // split_page首个key暂时有效

        /* 初始化新的根页面 */
        new_root_page->Init(root_page_id_, INVALID_PAGE_ID, internal_max_size_);
        new_root_page->SetKeyAt(0, split_page->KeyAt(0));  // 无任何实际意义的填充值
        new_root_page->SetValueAt(0, target_page->GetPageId());
        new_root_page->SetKeyAt(1, split_page->KeyAt(0));
        new_root_page->SetValueAt(1, split_page->GetPageId());
        new_root_page->IncreaseSize(1);
        target_page->SetParentPageId(root_page_id_);  // 将新根页面设置为旧根页面的父页面

        UpdateRootPageId(false);

        buffer_pool_manager_->UnpinPage(split_page->GetPageId(), true);
        buffer_pool_manager_->UnpinPage(new_root_page->GetPageId(), true);
        return;
    }

    page_id_t split_page_id;
    auto split_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->NewPage(&split_page_id)->GetData());
    auto parent_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->FetchPage(target_page->GetParentPageId())->GetData());

    /* 初始化分裂页面 */
    split_page->Init(split_page_id, target_page->GetParentPageId(), internal_max_size_);
    target_page->MoveHalfDataAndInsertTo(split_page, key, value, comparator_, buffer_pool_manager_);  // split_page首个key暂时有效

    /* 判断父页面是否上溢 */
    if (parent_page->GetSize() == parent_page->GetMaxSize()) {
        HandleInternalOverflow(parent_page, split_page->KeyAt(0), split_page->GetPageId());
    } else {
        parent_page->InsertByKey(split_page->KeyAt(0), split_page->GetPageId(), comparator_, buffer_pool_manager_);
    }

    buffer_pool_manager_->UnpinPage(split_page->GetPageId(), true);
    buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
}

叶子页面中的数据操作函数

INDEX_TEMPLATE_ARGUMENTS
auto B_PLUS_TREE_LEAF_PAGE_TYPE::InsertByKey(const KeyType &key, const ValueType &value,
                                             const KeyComparator &comparator) -> bool {
    int initial_len = GetSize();  // 插入数据前的长度
    int insert_pos = 0;           // 插入数据位置

    // 2 -> 1 3
    // 2 -> 1 2 3
    // 2 -> 1
    // 2 -> 3
    while (insert_pos < initial_len) {
        if (comparator(key, array_[insert_pos].first) == 0) {
            return false;  // key不能重复
        }

        if (comparator(key, array_[insert_pos].first) > 0) {
            insert_pos++;
        } else {
            break;
        }
    }

    /* 插入位置后面的元素后移 */
    IncreaseSize(1);
    for (int i = initial_len; i > insert_pos; i--) {
        array_[i] = array_[i - 1];
    }

    /* 插入 */
    array_[insert_pos].first = key;
    array_[insert_pos].second = value;

    return true;
}

INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_LEAF_PAGE_TYPE::MoveHalfDataTo(B_PLUS_TREE_LEAF_PAGE_TYPE *des_page) {
    int initial_len = GetSize();  // 移出数据前的长度

    for (int i = GetMinSize(), j = 0; i < initial_len; i++, j++) {
        des_page->array_[j] = array_[i];
        des_page->IncreaseSize(1);
        this->DecreaseSize(1);
    }

    des_page->SetNextPageId(this->GetNextPageId());
    this->SetNextPageId(des_page->GetPageId());
}

内部页面中的数据操作函数

INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::InsertByKey(const KeyType &key, const ValueType &value,
                                                 const KeyComparator &comparator,
                                                 BufferPoolManager *buffer_pool_manager) {
    /* 查找插入位置 */
    int insert_pos = 1;
    while (insert_pos < GetSize()) {
        assert(!(comparator(key, array_[insert_pos].first) == 0));  // key不能重复

        if (comparator(key, array_[insert_pos].first) > 0) {
            insert_pos++;
        } else {
            break;
        }
    }

    assert(insert_pos > 0);  // 因为第一个key为无效值所以按照key插入时必须保证在array[0]后面插入

    InsertByIndex(insert_pos, key, value, comparator, buffer_pool_manager);
}

INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::MoveHalfDataAndInsertTo(B_PLUS_TREE_INTERNAL_PAGE_TYPE *des_page,
                                                             const KeyType &key, const page_id_t &value,
                                                             const KeyComparator &comparator,
                                                             BufferPoolManager *buffer_pool_manager) {
    /* 整合源页面中的数据和待插入的数据 */
    std::vector<MappingType > tmp_array(GetMaxSize());
    int i = 1;  // 遍历array
    int j = 0;  // 遍历tmp_array
    // [invalid key, 1, 3] & 2 -> [1 2 3]
    while (i < GetMaxSize() && comparator(array_[i].first, key) < 0) {
        tmp_array.at(j) = array_[i];
        i++;
        j++;
    }
    tmp_array.at(j++) = std::make_pair(key, value);
    while (i < GetMaxSize()) {
        tmp_array.at(j) = array_[i];
        i++;
        j++;
    }

    /* 将整合后的数据对半分配到两个子页面中 */
    this->SetSize(1);
    des_page->SetSize(0);
    j = 0;
    for (i = 1; i < GetMinSize(); i++, j++) {
        array_[i] = tmp_array.at(j);
        this->IncreaseSize(1);

        auto child_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager->FetchPage(tmp_array.at(j).second)->GetData());
        child_page->SetParentPageId(this->GetPageId());
        buffer_pool_manager->UnpinPage(child_page->GetPageId(), true);
    }
    for (i = 0; j < GetMaxSize(); i++, j++) {
        des_page->array_[i] = tmp_array.at(j);  // 首个key也被赋有效值
        des_page->IncreaseSize(1);

        auto child_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager->FetchPage(tmp_array.at(j).second)->GetData());
        child_page->SetParentPageId(des_page->GetPageId());
        buffer_pool_manager->UnpinPage(child_page->GetPageId(), true);
    }
}

3.删除操作

注意事项:

  1. 叶子页面和内部页面最小长度的计算方式不一样,一个向下取整,一个向上取整,具体见下面的下界判断。
  2. 如果根页面发生下溢,要通过页面类型和大小更新根页面,或者清空B+树。
  3. 叶子页面合并时,推荐向左合并,不然很难处理叶子页面之间的串连索引。
  4. 内部页面合并时,除了修改内部页面本身,还要从缓冲池取出每一个子页面修改它们的 parent_page_id_
  5. 内部页面合并时,要先填充右边的页面的首个 key 为有效值,然后再合并。这个有效值本质就是这个内部页面所在子树的最左侧叶子页面的第一个 key,我这里通过 FindFistKey() 函数进行查找。
  6. 虽然兄弟页面的获取没有固定规则,但只有一个兄弟页面时肯定选择这个兄弟页面,而且也肯定优先选够借的兄弟,不能一味左优先或右优先。
  7. 待删除的页面在通过 UnpinPage() 解除固定后还需要通过 DeletePage() 从内存和外存中删除。

B+树中的删除函数及下溢处理函数

INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::Remove(const KeyType &key, Transaction *transaction) {
    std::unique_lock<std::shared_mutex> locker(shared_mutex_);

    /* B+树为空 */
    if (root_page_id_ == INVALID_PAGE_ID) {
        return;
    }

    LeafPage *target_leaf_page = FindLeafPage(key);

    /* key不存在 */
    if (!target_leaf_page->RemoveByKey(key, comparator_)) {
        buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
        return;
    }

    if ((target_leaf_page->GetSize() < target_leaf_page->GetMinSize())) {
        if (!target_leaf_page->IsRootPage()) {
            /* 非根叶子页面下溢 */
            HandleLeafUnderflow(target_leaf_page);
        } else if (target_leaf_page->GetSize() == 0) {
            /* 根节点为空 */
            buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), true);
            buffer_pool_manager_->DeletePage(target_leaf_page->GetPageId());
            root_page_id_ = INVALID_PAGE_ID;
            UpdateRootPageId(false);
        } else {
            buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), true);
        }
    } else {
        /* 没有下溢发生 */
        buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), true);
    }
}

INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::HandleLeafUnderflow(LeafPage *target_page) {
    int tar_index;
    int bro_index;
    auto parent_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->FetchPage(target_page->GetParentPageId())->GetData());
    auto bro_page = static_cast<LeafPage *>(GetBrotherPage(parent_page, target_page, tar_index, bro_index));

    /* 从兄弟页面借取 */
    if (bro_page->GetSize() > bro_page->GetMinSize()) {
        if (bro_index < tar_index) {
            /* 从左兄弟借最后一个数据 */
            KeyType bro_last_key = bro_page->KeyAt(bro_page->GetSize() - 1);
            ValueType bro_last_value = bro_page->ValueAt(bro_page->GetSize() - 1);
            bro_page->RemoveByKey(bro_last_key, comparator_);
            target_page->InsertByKey(bro_last_key, bro_last_value, comparator_);
            parent_page->SetKeyAt(tar_index, bro_last_key);
        } else {
            /* 从右兄弟借第一个数据 */
            KeyType bro_first_key = bro_page->KeyAt(0);
            ValueType bro_first_value = bro_page->ValueAt(0);
            bro_page->RemoveByKey(bro_first_key, comparator_);
            target_page->InsertByKey(bro_first_key, bro_first_value, comparator_);
            parent_page->SetKeyAt(bro_index, bro_page->KeyAt(0));
        }

        buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
        buffer_pool_manager_->UnpinPage(bro_page->GetPageId(), true);
        buffer_pool_manager_->UnpinPage(target_page->GetPageId(), true);
        return;
    }

    /* 将页面向左合并 */
    LeafPage *src_page;
    LeafPage *des_page;
    int src_index;
    if (bro_index < tar_index) {
        /* left_bro <- target */
        src_page = target_page;
        des_page = bro_page;
        src_index = tar_index;
    } else {
        /* target <- right_bro */
        src_page = bro_page;
        des_page = target_page;
        src_index = bro_index;
    }

    src_page->MoveAllDataTo(des_page);
    parent_page->RemoveByIndex(src_index);
    buffer_pool_manager_->UnpinPage(src_page->GetPageId(), true);
    buffer_pool_manager_->DeletePage(src_page->GetPageId());

    if (parent_page->GetSize() < parent_page->GetMinSize()) {
        if (!parent_page->IsRootPage()) {
            /* 非根内部页面下溢 */
            HandleInternalUnderflow(parent_page);
        } else if (parent_page->GetSize() == 1) {
            /* parent_page为根且仅有des_page一个孩子 */
            root_page_id_ = des_page->GetPageId();
            des_page->SetParentPageId(INVALID_PAGE_ID);
            UpdateRootPageId(false);
            buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
            buffer_pool_manager_->DeletePage(parent_page->GetPageId());
        } else {
            buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
        }
    } else {
        buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
    }
    buffer_pool_manager_->UnpinPage(des_page->GetPageId(), true);
}

INDEX_TEMPLATE_ARGUMENTS
void BPLUSTREE_TYPE::HandleInternalUnderflow(InternalPage *target_page) {
    /* 从缓冲池获取兄弟页面及相关下标 */
    int tar_index;
    int bro_index;
    auto parent_page = reinterpret_cast<InternalPage *>(buffer_pool_manager_->FetchPage(target_page->GetParentPageId())->GetData());
    auto bro_page = static_cast<InternalPage *>(GetBrotherPage(parent_page, target_page, tar_index, bro_index));

    /* 从兄弟页面借取 */
    if (bro_page->GetSize() > bro_page->GetMinSize()) {
        if (bro_index < tar_index) {
            /* 从左兄弟借最后一个数据 */
            KeyType bro_last_key = bro_page->KeyAt(bro_page->GetSize() - 1);
            page_id_t bro_last_value = bro_page->ValueAt(bro_page->GetSize() - 1);
            bro_page->RemoveByValue(bro_last_value);
            target_page->SetKeyAt(0, parent_page->KeyAt(tar_index));  // 临时填充首个key
            target_page->InsertByIndex(0, bro_last_key, bro_last_value, comparator_, buffer_pool_manager_);
            parent_page->SetKeyAt(tar_index, bro_last_key);
        } else {
            /* 从右兄弟借第一个数据 */
            KeyType bro_first_key = parent_page->KeyAt(bro_index);
            page_id_t bro_first_value = bro_page->ValueAt(0);
            bro_page->RemoveByValue(bro_first_value);
            target_page->InsertByIndex(target_page->GetSize(), bro_first_key, bro_first_value, comparator_, buffer_pool_manager_);
            parent_page->SetKeyAt(bro_index, bro_page->KeyAt(0));
        }

        buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
        buffer_pool_manager_->UnpinPage(bro_page->GetPageId(), true);
        buffer_pool_manager_->UnpinPage(target_page->GetPageId(), true);
        return;
    }

    /* 将页面向左合并 */
    InternalPage *src_page;
    InternalPage *des_page;
    int src_index;
    if (bro_index < tar_index) {
        /* left_bro <- target */
        src_page = target_page;
        des_page = bro_page;
        src_index = tar_index;
    } else {
        /* target <- right_bro */
        src_page = bro_page;
        des_page = target_page;
        src_index = bro_index;
    }

    src_page->SetKeyAt(0, FindFistKey(src_page));  // 临时填充首个key
    src_page->MoveAllDataTo(des_page, comparator_, buffer_pool_manager_);
    parent_page->RemoveByIndex(src_index);
    buffer_pool_manager_->UnpinPage(src_page->GetPageId(), true);
    buffer_pool_manager_->DeletePage(src_page->GetPageId());

    if (parent_page->GetSize() < parent_page->GetMinSize()) {
        if (!parent_page->IsRootPage()) {
            /* 非根内部页面下溢 */
            HandleInternalUnderflow(parent_page);
        } else if (parent_page->GetSize() == 1) {
            /* parent_page为根且仅有des_page一个孩子 */
            root_page_id_ = des_page->GetPageId();
            des_page->SetParentPageId(INVALID_PAGE_ID);
            UpdateRootPageId(false);
            buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
            buffer_pool_manager_->DeletePage(parent_page->GetPageId());
        } else {
            buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
        }
    } else {
        buffer_pool_manager_->UnpinPage(parent_page->GetPageId(), true);
    }
    buffer_pool_manager_->UnpinPage(des_page->GetPageId(), true);
}

INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::GetBrotherPage(InternalPage *parent_page, BPlusTreePage *child_page, int &target_index, int &bro_index) -> BPlusTreePage * {
    target_index = parent_page->GetIndexByValue(child_page->GetPageId());

    /* 只有左兄弟 */
    if (target_index == parent_page->GetSize() - 1) {
        auto bro_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(parent_page->ValueAt(target_index - 1))->GetData());
        bro_index = target_index - 1;
        return bro_page;
    }

    /* 只有右兄弟 */
    if (target_index == 0) {
        auto bro_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(parent_page->ValueAt(target_index + 1))->GetData());
        bro_index = target_index + 1;
        return bro_page;
    }

    /* 既有左兄弟也有右兄弟 */
    auto lbro_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(parent_page->ValueAt(target_index - 1))->GetData());
    auto rbro_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(parent_page->ValueAt(target_index + 1))->GetData());

    /* 左兄弟优先 */
    if (rbro_page->GetSize() > rbro_page->GetMinSize() && lbro_page->GetSize() < lbro_page->GetMinSize()) {
        buffer_pool_manager_->UnpinPage(lbro_page->GetPageId(), false);
        bro_index = target_index + 1;
        return rbro_page;
    }
    buffer_pool_manager_->UnpinPage(rbro_page->GetPageId(), false);
    bro_index = target_index - 1;
    return lbro_page;
}

INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::FindFistKey(InternalPage *target_page) -> KeyType {
    assert(root_page_id_ != INVALID_PAGE_ID);

    auto cur_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(target_page->ValueAt(0))->GetData());

    while (!cur_page->IsLeafPage()) {
        auto internal_page = static_cast<InternalPage *>(cur_page);
        cur_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(internal_page->ValueAt(0))->GetData());
        buffer_pool_manager_->UnpinPage(internal_page->GetPageId(), false);
    }
    buffer_pool_manager_->UnpinPage(cur_page->GetPageId(), false);
    return static_cast<LeafPage *>(cur_page)->KeyAt(0);
}

页面基类中的最小值计算函数

auto GetMinSize() const -> int {
    int min_size;

    if (IsLeafPage()) {
        min_size = max_size_ / 2;  // 向下取整
    } else {
        min_size = (max_size_ + 1) / 2;  // 向上取整
    }

    return min_size;
}

叶子页面中的数据操作函数

INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_LEAF_PAGE_TYPE::RemoveByIndex(int index) {
    for (int i = index; i < GetSize() - 1; i++) {
        array_[i] = array_[i + 1];
    }

    DecreaseSize(1);
}

INDEX_TEMPLATE_ARGUMENTS
auto B_PLUS_TREE_LEAF_PAGE_TYPE::RemoveByKey(const KeyType &key, const KeyComparator &comparator) -> bool {
    for (int i = 0; i < GetSize(); i++) {
        if (comparator(array_[i].first, key) == 0) {
            RemoveByIndex(i);
            return true;
        }
    }

    return false;
}

INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_LEAF_PAGE_TYPE::MoveAllDataTo(B_PLUS_TREE_LEAF_PAGE_TYPE *des_page) {
    for (int i = 0, j = des_page->GetSize(); i < GetSize(); i++, j++) {
        des_page->array_[j] = array_[i];
        des_page->IncreaseSize(1);
    }
    this->SetSize(0);

    des_page->SetNextPageId(this->GetNextPageId());
}

内部页面中的数据操作函数

INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::RemoveByIndex(int index) {
    for (int i = index; i < GetSize() - 1; i++) {
        array_[i] = array_[i + 1];
    }

    DecreaseSize(1);
}

INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::RemoveByValue(const page_id_t &value) {
    for (int i = 0; i < GetSize(); i++) {
        if (array_[i].second == value) {
            RemoveByIndex(i);
        }
    }
}

INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::UpdateByIndex(int index, const KeyType &key, const ValueType &value, const KeyComparator &comparator, BufferPoolManager *buffer_pool_manager) {
    array_[index].first = key;
    array_[index].second = value;
    auto child_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager->FetchPage(value)->GetData());
    child_page->SetParentPageId(GetPageId());
    buffer_pool_manager->UnpinPage(value, true);
}

INDEX_TEMPLATE_ARGUMENTS
auto B_PLUS_TREE_INTERNAL_PAGE_TYPE::GetIndexByValue(const ValueType &value) -> int {
    for (int i = 0; i < GetSize(); i++) {
        if (array_[i].second == value) {
            return i;
        }
    }

    return -1;
}

INDEX_TEMPLATE_ARGUMENTS
void B_PLUS_TREE_INTERNAL_PAGE_TYPE::MoveAllDataTo(B_PLUS_TREE_INTERNAL_PAGE_TYPE *des_page, const KeyComparator &comparator, BufferPoolManager *buffer_pool_manager) {
    for (int i = 0, j = des_page->GetSize(); i < GetSize(); i++, j++) {
        des_page->array_[j] = array_[i];
        des_page->IncreaseSize(1);

        auto child_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager->FetchPage(array_[i].second)->GetData());
        child_page->SetParentPageId(des_page->GetPageId());
        buffer_pool_manager->UnpinPage(child_page->GetPageId(), true);
    }
    this->SetSize(0);
}

4.迭代器

这里迭代器的实现主要是为了能够通过 CHECKPOINT #2,注意自加操作时的跳页处理即可。

B+树中的迭代器获取函数

INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::Begin() -> INDEXITERATOR_TYPE {
    /* B+树为空 */
    if (root_page_id_ == INVALID_PAGE_ID) {
        return INDEXITERATOR_TYPE();
    }

    /* 循环寻找最左边的叶子页面 */
    auto cur_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(root_page_id_)->GetData());
    while (true) {
        if (cur_page->IsLeafPage()) {
            buffer_pool_manager_->UnpinPage(cur_page->GetPageId(), false);
            return INDEXITERATOR_TYPE(cur_page->GetPageId(), 0, buffer_pool_manager_);
        }

        /* 继续查找 */
        page_id_t next_page_id = static_cast<InternalPage *>(cur_page)->ValueAt(0);
        auto next_page = reinterpret_cast<BPlusTreePage *>(buffer_pool_manager_->FetchPage(next_page_id)->GetData());
        buffer_pool_manager_->UnpinPage(cur_page->GetPageId(), false);
        cur_page = next_page;
    }
}

INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::Begin(const KeyType &key) -> INDEXITERATOR_TYPE {
    /* B+树为空 */
    if (root_page_id_ == INVALID_PAGE_ID) {
        return INDEXITERATOR_TYPE();
    }
    
    LeafPage *target_leaf_page = FindLeafPage(key);

    int i = 0;
    while (i < target_leaf_page->GetSize() && comparator_(target_leaf_page->KeyAt(i), key) < 0) {
        i++;
    }

    buffer_pool_manager_->UnpinPage(target_leaf_page->GetPageId(), false);
    return INDEXITERATOR_TYPE(target_leaf_page->GetPageId(), i, buffer_pool_manager_);
}

INDEX_TEMPLATE_ARGUMENTS
auto BPLUSTREE_TYPE::End() -> INDEXITERATOR_TYPE { return INDEXITERATOR_TYPE(); }

迭代器自加操作

auto operator++() -> INDEXITERATOR_TYPE & {
    if (IsEnd()) {
        return *this;
    }

    /* 如果index_加一之后溢出需要进行跳页处理 */
    if (++index_ == page_->GetSize()) {
        page_id_ = page_->GetNextPageId();
        if (page_id_ == INVALID_PAGE_ID) {
            buffer_pool_manager_->UnpinPage(page_->GetPageId(), false);
            page_ = nullptr;
            index_ = -1;
        } else {
            buffer_pool_manager_->UnpinPage(page_->GetPageId(), false);
            page_ = reinterpret_cast<LeafPage *>(buffer_pool_manager_->FetchPage(page_id_)->GetData());
            index_ = 0;
        }
    }

    return *this;
}

四、自定义测试用例

我这里基于 std::random_device 实现了一个自定义的测试函数,通过指定 USE_RANDOM_DATA 的值选择随机或固定数据。

其中 GetUnpinCount() 函数会统计当前被固定的页面数量,因为 header_page 会始终被固定,因此正常情况下每个操作执行完被固定的页面数量都应该是 1,如果不是的话说明有页面未被释放或重复释放。GetUnpinCount() 的实现就是给缓冲池添加一个 test_unpin_count_ 成员变量,初始为 0,取页面和新建页面会使其加一,取消页面固定会使其减一。

#define USE_RANDOM_DATA 1
TEST(BPlusTreeTests, RandomTest) {
    auto key_schema = ParseCreateStatement("a bigint");
    GenericComparator<8> comparator(key_schema.get());
    BufferPoolManager *bpm = new BufferPoolManagerInstance(50, new DiskManager("test.db"));
    std::random_device random;

#if USE_RANDOM_DATA
    int internal_page_max_size = random() % 5 + 2;
    int leaf_page_max_size = random() % 5 + internal_page_max_size;
#else
    int leaf_page_max_size = 8;
    int internal_page_max_size = 5;
#endif

    BPlusTree<GenericKey<8>, RID, GenericComparator<8>> tree("test", bpm, comparator, internal_page_max_size,
                                                             leaf_page_max_size);
    GenericKey<8> index_key;
    RID rid;

    auto transaction = new Transaction(0);
    page_id_t header_page_id;
    [[maybe_unused]] auto header_page = bpm->NewPage(&header_page_id);

    ASSERT_EQ(header_page_id, HEADER_PAGE_ID);

#if USE_RANDOM_DATA
    int len = random() % 1000;
    std::vector<int64_t> keys(len);
    for (int i = 0; i < len; i++) {
        keys.at(i) = random() % 500;
    }
#else
    std::vector<int64_t> keys = {5, 20, 1, 44, 22, 42, 5, 49, 41, 12, 29, 3, 43, 33, 26, 44, 44, 29, 2, 46};
    std::vector<int64_t> del_keys = {41, 42, 3, 5, 26, 1, 49, 29, 20, 5, 44, 33, 29, 22, 12, 44, 44, 2, 46, 43};
#endif

    std::cout << leaf_page_max_size << " " << internal_page_max_size << std::endl;
    for (size_t i = 0; i < keys.size(); i++) {
        std::cout << keys.at(i) << ((i == keys.size() - 1) ? "\n" : ", ");
    }

    for (auto key : keys) {
        rid.Set(static_cast<int32_t>(key >> 32), key);
        index_key.SetFromInteger(key);

        tree.Insert(index_key, rid, transaction);
        tree.Draw(bpm, "/Users/liaohan/CLionProjects/bustub/cmake-build-debug/test/pic");
        EXPECT_EQ(bpm->GetUnpinCount(), 1);
    }

    std::vector<RID> rids;
    for (auto key : keys) {
        rids.clear();
        index_key.SetFromInteger(key);
        tree.GetValue(index_key, &rids);
        EXPECT_EQ(rids.size(), 1);
        EXPECT_EQ(rids[0].GetSlotNum(), key);
        EXPECT_EQ(bpm->GetUnpinCount(), 1);
    }

#if USE_RANDOM_DATA
    std::shuffle(keys.begin(), keys.end(), random);
#else
    keys = del_keys;
#endif

    for (size_t i = 0; i < keys.size(); i++) {
        std::cout << keys.at(i) << ((i == keys.size() - 1) ? "\n" : ", ");
    }

    for (auto key : keys) {
        index_key.SetFromInteger(key);
        tree.Remove(index_key, transaction);
        if (tree.GetRootPageId() != INVALID_PAGE_ID) {
            tree.Draw(bpm, "dot文件路径");
        }
        EXPECT_EQ(bpm->GetUnpinCount(), 1);
    }

    bpm->UnpinPage(HEADER_PAGE_ID, true);
    EXPECT_EQ(tree.GetRootPageId(), INVALID_PAGE_ID);
    EXPECT_EQ(bpm->GetUnpinCount(), 0);
    delete transaction;
    delete bpm;
    remove("test.db");
    remove("test.log");
}

五、评测结果


参考:

https://xiaolincoding.com/mysql/index/page.html
https://blog.csdn.net/Altair_alpha/article/details/129071063

在这里插入图片描述

你可能感兴趣的:(CMU,15-445(FALL,2022),数据库内核,b+树)