cmu 445 poject 2笔记

2022年的任务 https://15445.courses.cs.cmu.edu/fall2022/project2/
checkpoint 1,实现b+树,读,写,删
checkpoint 2, 实现b+树,迭代器,并发读写删
本文不写代码,只记录遇到的一些思维盲点

checkpoint 1

先理解B+树的几个操作,以及先看一份别的代码,最后再在bushub的代码框架下实现逻辑。

可以参考的一份B+树的代码,带注释。毕竟不看代码,光看图还是有些迷糊的。

// Deletion operation on a B+ Tree in C++

#include 
#include 
#include 
#include 

#define DEFAULT_ORDER 3

typedef struct record {
  int value;
} record;

typedef struct node {
  void **pointers;
  int *keys;
  struct node *parent;
  bool is_leaf;
  int num_keys;
  struct node *next;
} node;

int order = DEFAULT_ORDER;
node *queue = NULL;
bool verbose_output = false;

void enqueue(node *new_node);
node *dequeue(void);
int height(node *const root);
int path_to_root(node *const root, node *child);
void print_leaves(node *const root);
void print_tree(node *const root);
void find_and_print(node *const root, int key, bool verbose);
void find_and_print_range(node *const root, int range1, int range2,
                          bool verbose);
int find_range(node *const root, int key_start, int key_end, bool verbose,
               int returned_keys[], void *returned_pointers[]);

node *find_leaf(node *const root, int key, bool verbose);
record *find(node *root, int key, bool verbose, node **leaf_out);
int cut(int length);

record *make_record(int value);
node *make_node(void);
node *make_leaf(void);
int get_left_index(node *parent, node *left);
node *insert_into_leaf(node *leaf, int key, record *pointer);
node *insert_into_leaf_after_splitting(node *root, node *leaf, int key,
                                       record *pointer);
node *insert_into_node(node *root, node *parent, int left_index, int key,
                       node *right);
node *insert_into_node_after_splitting(node *root, node *parent, int left_index,
                                       int key, node *right);
node *insert_into_parent(node *root, node *left, int key, node *right);
node *insert_into_new_root(node *left, int key, node *right);
node *start_new_tree(int key, record *pointer);
node *insert(node *root, int key, int value);

int get_neighbor_index(node *n);
node *adjust_root(node *root);
node *coalesce_nodes(node *root, node *n, node *neighbor, int neighbor_index,
                     int k_prime);
node *redistribute_nodes(node *root, node *n, node *neighbor,
                         int neighbor_index, int k_prime_index, int k_prime);
node *delete_entry(node *root, node *n, int key, void *pointer);
node *delete_op(node *root, int key);

void enqueue(node *new_node) {
  node *c;
  if (queue == NULL) {
    queue = new_node;
    queue->next = NULL;
  } else {
    c = queue;
    while (c->next != NULL) {
      c = c->next;
    }
    c->next = new_node;
    new_node->next = NULL;
  }
}

node *dequeue(void) {
  node *n = queue;
  queue = queue->next;
  n->next = NULL;
  return n;
}

void print_leaves(node *const root) {
  if (root == NULL) {
    printf("Empty tree.\n");
    return;
  }
  int i;
  node *c = root;
  while (!c->is_leaf) c = c->pointers[0];
  while (true) {
    for (i = 0; i < c->num_keys; i++) {
      if (verbose_output) printf("%p ", c->pointers[i]);
      printf("%d ", c->keys[i]);
    }
    if (verbose_output) printf("%p ", c->pointers[order - 1]);
    if (c->pointers[order - 1] != NULL) {
      printf(" | ");
      c = c->pointers[order - 1];
    } else
      break;
  }
  printf("\n");
}

int height(node *const root) {
  int h = 0;
  node *c = root;
  while (!c->is_leaf) {
    c = c->pointers[0];
    h++;
  }
  return h;
}
int path_to_root(node *const root, node *child) {
  int length = 0;
  node *c = child;
  while (c != root) {
    c = c->parent;
    length++;
  }
  return length;
}

void print_tree(node *const root) {
  node *n = NULL;
  int i = 0;
  int rank = 0;
  int new_rank = 0;

  if (root == NULL) {
    printf("Empty tree.\n");
    return;
  }
  queue = NULL;
  enqueue(root);
  while (queue != NULL) {
    n = dequeue();
    if (n->parent != NULL && n == n->parent->pointers[0]) {
      new_rank = path_to_root(root, n);
      if (new_rank != rank) {
        rank = new_rank;
        printf("\n");
      }
    }
    if (verbose_output) printf("(%p)", n);
    for (i = 0; i < n->num_keys; i++) {
      if (verbose_output) printf("%p ", n->pointers[i]);
      printf("%d ", n->keys[i]);
    }
    if (!n->is_leaf)
      for (i = 0; i <= n->num_keys; i++) enqueue(n->pointers[i]);
    if (verbose_output) {
      if (n->is_leaf)
        printf("%p ", n->pointers[order - 1]);
      else
        printf("%p ", n->pointers[n->num_keys]);
    }
    printf("| ");
  }
  printf("\n");
}

void find_and_print(node *const root, int key, bool verbose) {
  node *leaf = NULL;
  record *r = find(root, key, verbose, NULL);
  if (r == NULL)
    printf("Record not found under key %d.\n", key);
  else
    printf("Record at %p -- key %d, value %d.\n", r, key, r->value);
}

void find_and_print_range(node *const root, int key_start, int key_end,
                          bool verbose) {
  int i;
  int array_size = key_end - key_start + 1;
  int returned_keys[array_size];
  void *returned_pointers[array_size];
  int num_found = find_range(root, key_start, key_end, verbose, returned_keys,
                             returned_pointers);
  if (!num_found)
    printf("None found.\n");
  else {
    for (i = 0; i < num_found; i++)
      printf("Key: %d   Location: %p  Value: %d\n", returned_keys[i],
             returned_pointers[i], ((record *)returned_pointers[i])->value);
  }
}

int find_range(node *const root, int key_start, int key_end, bool verbose,
               int returned_keys[], void *returned_pointers[]) {
  int i, num_found;
  num_found = 0;
  node *n = find_leaf(root, key_start, verbose);
  if (n == NULL) return 0;
  for (i = 0; i < n->num_keys && n->keys[i] < key_start; i++)
    ;
  if (i == n->num_keys) return 0;
  while (n != NULL) {
    for (; i < n->num_keys && n->keys[i] <= key_end; i++) {
      returned_keys[num_found] = n->keys[i];
      returned_pointers[num_found] = n->pointers[i];
      num_found++;
    }
    n = n->pointers[order - 1];
    i = 0;
  }
  return num_found;
}

node *find_leaf(node *const root, int key, bool verbose) {
  if (root == NULL) {
    if (verbose) printf("Empty tree.\n");
    return root;
  }
  int i = 0;
  node *c = root;
  while (!c->is_leaf) {
    if (verbose) {
      printf("[");
      for (i = 0; i < c->num_keys - 1; i++) printf("%d ", c->keys[i]);
      printf("%d] ", c->keys[i]);
    }
    i = 0;
    while (i < c->num_keys) {
      if (key >= c->keys[i])
        i++;
      else
        break;
    }
    if (verbose) printf("%d ->\n", i);
    c = (node *)c->pointers[i];
  }
  if (verbose) {
    printf("Leaf [");
    for (i = 0; i < c->num_keys - 1; i++) printf("%d ", c->keys[i]);
    printf("%d] ->\n", c->keys[i]);
  }
  return c;
}

record *find(node *root, int key, bool verbose, node **leaf_out) {
  if (root == NULL) {
    if (leaf_out != NULL) {
      *leaf_out = NULL;
    }
    return NULL;
  }

  int i = 0;
  node *leaf = NULL;

  leaf = find_leaf(root, key, verbose);

  for (i = 0; i < leaf->num_keys; i++)
    if (leaf->keys[i] == key) break;
  if (leaf_out != NULL) {
    *leaf_out = leaf;
  }
  if (i == leaf->num_keys)
    return NULL;
  else
    return (record *)leaf->pointers[i];
}

int cut(int length) {
  if (length % 2 == 0)
    return length / 2;
  else
    return length / 2 + 1;
}

record *make_record(int value) {
  record *new_record = (record *)malloc(sizeof(record));
  if (new_record == NULL) {
    perror("Record creation.");
    exit(EXIT_FAILURE);
  } else {
    new_record->value = value;
  }
  return new_record;
}

node *make_node(void) {
  node *new_node;
  new_node = malloc(sizeof(node));
  if (new_node == NULL) {
    perror("Node creation.");
    exit(EXIT_FAILURE);
  }
  new_node->keys = malloc((order - 1) * sizeof(int));
  if (new_node->keys == NULL) {
    perror("New node keys array.");
    exit(EXIT_FAILURE);
  }
  new_node->pointers = malloc(order * sizeof(void *));
  if (new_node->pointers == NULL) {
    perror("New node pointers array.");
    exit(EXIT_FAILURE);
  }
  new_node->is_leaf = false;
  new_node->num_keys = 0;
  new_node->parent = NULL;
  new_node->next = NULL;
  return new_node;
}

node *make_leaf(void) {
  node *leaf = make_node();
  leaf->is_leaf = true;
  return leaf;
}

int get_left_index(node *parent, node *left) {
  int left_index = 0;
  while (left_index <= parent->num_keys && parent->pointers[left_index] != left)
    left_index++;
  return left_index;
}

node *insert_into_leaf(node *leaf, int key, record *pointer) {
  int i, insertion_point;

  insertion_point = 0;
  // 找到对应的位置,即插入排序
  while (insertion_point < leaf->num_keys && leaf->keys[insertion_point] < key)
    insertion_point++;

  // 往后移动元素,腾挪位置给新的记录
  for (i = leaf->num_keys; i > insertion_point; i--) {
    leaf->keys[i] = leaf->keys[i - 1];
    leaf->pointers[i] = leaf->pointers[i - 1];
  }
  leaf->keys[insertion_point] = key;
  leaf->pointers[insertion_point] = pointer;
  leaf->num_keys++;
  return leaf;
}

node *insert_into_leaf_after_splitting(node *root, node *leaf, int key,
                                       record *pointer) {
  node *new_leaf;
  int *temp_keys;
  void **temp_pointers;
  int insertion_index, split, new_key, i, j;

  // 创建一个新的叶子节点
  new_leaf = make_leaf();

  temp_keys = malloc(order * sizeof(int));
  if (temp_keys == NULL) {
    perror("Temporary keys array.");
    exit(EXIT_FAILURE);
  }

  temp_pointers = malloc(order * sizeof(void *));
  if (temp_pointers == NULL) {
    perror("Temporary pointers array.");
    exit(EXIT_FAILURE);
  }

  insertion_index = 0;
  // 先找到新记录在叶子节点中该插入的位置,需要注意的是叶子节点是order -
  // 1个key,和order - 1个指针
  while (insertion_index < order - 1 && leaf->keys[insertion_index] < key)
    insertion_index++;

  // 把叶子节点的记录拷贝到tmp数组下,其中给新插入的记录预留一个位置
  for (i = 0, j = 0; i < leaf->num_keys; i++, j++) {
    if (j == insertion_index) j++;
    temp_keys[j] = leaf->keys[i];
    temp_pointers[j] = leaf->pointers[i];
  }

  // 将新记录插入到tmp数组中
  temp_keys[insertion_index] = key;
  temp_pointers[insertion_index] = pointer;

  leaf->num_keys = 0;

  // 拆分叶子节点
  split = cut(order - 1);

  // 将tmp数组的前一半拷贝回当前叶子节点
  for (i = 0; i < split; i++) {
    leaf->pointers[i] = temp_pointers[i];
    leaf->keys[i] = temp_keys[i];
    leaf->num_keys++;
  }

  // 将tmp数组的前一半拷贝回新叶子节点
  for (i = split, j = 0; i < order; i++, j++) {
    new_leaf->pointers[j] = temp_pointers[i];
    new_leaf->keys[j] = temp_keys[i];
    new_leaf->num_keys++;
  }

  free(temp_pointers);
  free(temp_keys);

  // 末尾指针指向右兄弟节点
  new_leaf->pointers[order - 1] = leaf->pointers[order - 1];
  leaf->pointers[order - 1] = new_leaf;

  for (i = leaf->num_keys; i < order - 1; i++) leaf->pointers[i] = NULL;
  for (i = new_leaf->num_keys; i < order - 1; i++) new_leaf->pointers[i] = NULL;

  new_leaf->parent = leaf->parent;
  new_key = new_leaf->keys[0];

  // 父节点要插入一个记录,记录新的叶子节点
  return insert_into_parent(root, leaf, new_key, new_leaf);
}

node *insert_into_node(node *root, node *n, int left_index, int key,
                       node *right) {
  int i;

  for (i = n->num_keys; i > left_index; i--) {
    n->pointers[i + 1] = n->pointers[i];
    n->keys[i] = n->keys[i - 1];
  }
  n->pointers[left_index + 1] = right;
  n->keys[left_index] = key;
  n->num_keys++;
  return root;
}

node *insert_into_node_after_splitting(node *root, node *old_node,
                                       int left_index, int key, node *right) {
  int i, j, split, k_prime;
  node *new_node, *child;
  int *temp_keys;
  node **temp_pointers;

  temp_pointers = malloc((order + 1) * sizeof(node *));
  if (temp_pointers == NULL) {
    perror("Temporary pointers array for splitting nodes.");
    exit(EXIT_FAILURE);
  }
  temp_keys = malloc(order * sizeof(int));
  if (temp_keys == NULL) {
    perror("Temporary keys array for splitting nodes.");
    exit(EXIT_FAILURE);
  }

  // 将指针拷贝到tmp数组
  for (i = 0, j = 0; i < old_node->num_keys + 1; i++, j++) {
    if (j == left_index + 1) j++;
    temp_pointers[j] = old_node->pointers[i];
  }

  // 将key拷贝到tmp数组
  for (i = 0, j = 0; i < old_node->num_keys; i++, j++) {
    if (j == left_index) j++;
    temp_keys[j] = old_node->keys[i];
  }

  // 将指针和key插入到tmp数组中,需要注意的是非叶子节点是order个指针,order -
  // 1个key
  temp_pointers[left_index + 1] = right;
  temp_keys[left_index] = key;

  split = cut(order);  // 按point个数来拆非叶子节点
  new_node = make_node();
  old_node->num_keys = 0;
  for (i = 0; i < split - 1; i++) {
    old_node->pointers[i] = temp_pointers[i];
    old_node->keys[i] = temp_keys[i];
    old_node->num_keys++;
  }
  old_node->pointers[i] = temp_pointers[i];
  k_prime = temp_keys[split - 1];  // 会移到父节点上,所以子节点本来是order个key拆两部分,拆开后就变成两个合起来只有order
                                   // - 1 个key了。
  for (++i, j = 0; i < order; i++, j++) {
    new_node->pointers[j] = temp_pointers[i];
    new_node->keys[j] = temp_keys[i];
    new_node->num_keys++;
  }
  new_node->pointers[j] = temp_pointers[i];
  free(temp_pointers);
  free(temp_keys);
  new_node->parent = old_node->parent;
  for (i = 0; i <= new_node->num_keys; i++) {
    child = new_node->pointers[i];
    child->parent = new_node;
  }

  // 将中间的key插入到父节点中,然后父节点也添加指针指向新的右节点
  return insert_into_parent(root, old_node, k_prime, new_node);
}

node *insert_into_parent(node *root, node *left, int key, node *right) {
  int left_index;
  node *parent;

  parent = left->parent;

  if (parent == NULL)  // 根节点需要调高
    return insert_into_new_root(left, key, right);

  // 找到父节点中,当前叶子节点所在的索引位置
  left_index = get_left_index(parent, left);

  if (parent->num_keys <
      order - 1)  // 如果父节点未满,就将新叶子节点right加入父节点中
    return insert_into_node(root, parent, left_index, key, right);

  // 父节点已经满了,需要拆分
  return insert_into_node_after_splitting(root, parent, left_index, key, right);
}

node *insert_into_new_root(node *left, int key, node *right) {
  node *root = make_node();
  root->keys[0] = key;
  root->pointers[0] = left;
  root->pointers[1] = right;
  root->num_keys++;
  root->parent = NULL;
  left->parent = root;
  right->parent = root;
  return root;
}

node *start_new_tree(int key, record *pointer) {
  node *root = make_leaf();
  root->keys[0] = key;
  root->pointers[0] = pointer;
  root->pointers[order - 1] = NULL;
  root->parent = NULL;
  root->num_keys++;
  return root;
}

node *insert(node *root, int key, int value) {
  record *record_pointer = NULL;
  node *leaf = NULL;

  // 先找到叶子节点
  record_pointer = find(root, key, false, NULL);
  if (record_pointer != NULL) {  // 如果找到了,就更新value
    record_pointer->value = value;
    return root;
  }

  // 新建一条记录
  record_pointer = make_record(value);

  if (root == NULL)  // 如果是第一个记录,就创建树,root为叶子节点
    return start_new_tree(key, record_pointer);

  // 找到应该插入的叶子节点
  leaf = find_leaf(root, key, false);

  if (leaf->num_keys < order - 1) {
    // 如果叶子节点未满,就插入叶子节点中
    leaf = insert_into_leaf(leaf, key, record_pointer);
    return root;
  }
  // 叶子节点满了,此时要进行叶子节点拆分
  return insert_into_leaf_after_splitting(root, leaf, key, record_pointer);
}

int get_neighbor_index(node *n) {
  int i;
  for (i = 0; i <= n->parent->num_keys; i++)
    if (n->parent->pointers[i] == n) return i - 1;

  printf("Search for nonexistent pointer to node in parent.\n");
  printf("Node:  %#lx\n", (unsigned long)n);
  exit(EXIT_FAILURE);
}

node *remove_entry_from_node(node *n, int key, node *pointer) {
  int i, num_pointers;
  i = 0;
  while (n->keys[i] != key) i++;
  for (++i; i < n->num_keys; i++) n->keys[i - 1] = n->keys[i];

  num_pointers = n->is_leaf ? n->num_keys : n->num_keys + 1;
  i = 0;
  while (n->pointers[i] != pointer) i++;
  for (++i; i < num_pointers; i++) n->pointers[i - 1] = n->pointers[i];

  n->num_keys--;

  if (n->is_leaf)
    for (i = n->num_keys; i < order - 1; i++) n->pointers[i] = NULL;
  else
    for (i = n->num_keys + 1; i < order; i++) n->pointers[i] = NULL;

  return n;
}

node *adjust_root(node *root) {
  node *new_root;

  if (root->num_keys > 0) return root;

  // root高度下降,根往下移,删除只是删除叶子节点,为啥会动root?
  // 应该不会到这个分支
  if (!root->is_leaf) {
    new_root = root->pointers[0];
    new_root->parent = NULL;
  }
  // 最后一个元素,直接清空树根
  else
    new_root = NULL;

  free(root->keys);
  free(root->pointers);
  free(root);

  return new_root;
}

node *coalesce_nodes(node *root, node *n, node *neighbor, int neighbor_index,
                     int k_prime) {
  int i, j, neighbor_insertion_index, n_end;
  node *tmp;

  if (neighbor_index ==
      -1) {  // 如果是和右兄弟节点合并,先交换下顺序,保证neighbor指向左边节点,而n指向右边节点
    tmp = n;
    n = neighbor;
    neighbor = tmp;
  }

  neighbor_insertion_index = neighbor->num_keys;

  if (!n->is_leaf) {  // 非叶子节点
    neighbor->keys[neighbor_insertion_index] =
        k_prime;  // 左节点先把父节点的key存下来
    neighbor->num_keys++;

    n_end = n->num_keys;

    for (i = neighbor_insertion_index + 1, j = 0; j < n_end;
         i++, j++) {  // 左节点把右节点的key,pointer都拷贝过来
      neighbor->keys[i] = n->keys[j];
      neighbor->pointers[i] = n->pointers[j];
      neighbor->num_keys++;
      n->num_keys--;
    }

    neighbor->pointers[i] = n->pointers[j];

    for (i = 0; i < neighbor->num_keys + 1;
         i++) {  // 右节点的子节点的父指针都指向左节点
      tmp = (node *)neighbor->pointers[i];
      tmp->parent = neighbor;
    }
  }
  else {  // 叶子节点
    for (i = neighbor_insertion_index, j = 0; j < n->num_keys;
         i++, j++) {  // 拷贝右节点的key和指针即可
      neighbor->keys[i] = n->keys[j];
      neighbor->pointers[i] = n->pointers[j];
      neighbor->num_keys++;
    }
    neighbor->pointers[order - 1] =
        n->pointers[order - 1];  // 左节点的最末尾指针指向下一个节点
  }

  // 删除父节点对应的key,因为这key下放到下一层节点,或者删除了
  root = delete_entry(root, n->parent, k_prime, n);
  free(n->keys);
  free(n->pointers);
  free(n);
  return root;
}

node *redistribute_nodes(node *root, node *n, node *neighbor,
                         int neighbor_index, int k_prime_index, int k_prime) {
  int i;
  node *tmp;

  if (neighbor_index != -1) {  // 通常从左节点借
    if (!n->is_leaf)           // 非叶子节点,最右指针往右挪一下
      n->pointers[n->num_keys + 1] = n->pointers[n->num_keys];
    for (i = n->num_keys; i > 0; i--) {  // 先把key和point往右挪一位,给要借的的key腾出空间
      n->keys[i] = n->keys[i - 1];
      n->pointers[i] = n->pointers[i - 1];
    }
    if (!n->is_leaf) {  // 非叶子节点,把左兄弟节点最末尾的指针拷贝过来
      n->pointers[0] = neighbor->pointers[neighbor->num_keys];
      tmp = (node *)n->pointers[0];
      tmp->parent = n;
      neighbor->pointers[neighbor->num_keys] = NULL;
      n->keys[0] = k_prime;  // 左兄弟节点最大值移到本节点最左侧
      n->parent->keys[k_prime_index] =
          neighbor->keys[neighbor->num_keys -
                         1];  // 父节点之前存放左兄弟节点的最大值,改成次大值
    } else {  // 叶子节点,只用调整key和父节点的key就行
      n->pointers[0] = neighbor->pointers[neighbor->num_keys - 1];
      neighbor->pointers[neighbor->num_keys - 1] = NULL;
      n->keys[0] = neighbor->keys[neighbor->num_keys - 1];
      n->parent->keys[k_prime_index] = n->keys[0];
    }
  }

  else {  // 右节点作为兄弟节点来借数据,一般是当前节点是最左端的那个节点才会这样
    if (n->is_leaf) {  // 叶子节点,从右兄弟节点挪过来一个key就行
      n->keys[n->num_keys] = neighbor->keys[0];
      n->pointers[n->num_keys] = neighbor->pointers[0];
      n->parent->keys[k_prime_index] =
          neighbor->keys
              [1];  // 父节点原来存放右兄弟节点的第一个key,现在改成第二个key
    } else {        // 非叶子节点
      n->keys[n->num_keys] = k_prime;  // 父节点的第一个key给本节点
      n->pointers[n->num_keys + 1] = neighbor->pointers[0];
      tmp = (node *)n->pointers[n->num_keys + 1];
      tmp->parent = n;
      n->parent->keys[k_prime_index] =
          neighbor->keys[0];  // 父节点的第一个key改为右兄弟节点的第一个key
    }
    for (i = 0; i < neighbor->num_keys - 1;
         i++) {  // 调整右兄弟节点的key和pointer,都左移一位
      neighbor->keys[i] = neighbor->keys[i + 1];
      neighbor->pointers[i] = neighbor->pointers[i + 1];
    }
    if (!n->is_leaf)  // 如果是非叶子节点,还需要再挪一下右兄弟节点的最右边的指针
      neighbor->pointers[i] = neighbor->pointers[i + 1];
  }

  n->num_keys++;
  neighbor->num_keys--;

  return root;
}

node *delete_entry(node *root, node *n, int key, void *pointer) {
  int min_keys;
  node *neighbor;
  int neighbor_index;
  int k_prime_index, k_prime;
  int capacity;

  // 先从叶子节点删除本key
  n = remove_entry_from_node(n, key, pointer);

  if (n == root) return adjust_root(root);

  min_keys = n->is_leaf ? cut(order - 1) : cut(order) - 1;
  // 叶子节点最小的key格式为order - 1/2, 非叶子节点是order/2 - 1

  if (n->num_keys >= min_keys)  // 如果节点够一半,删除结束
    return root;

  // 否则找左兄弟节点,只会是共一个父节点的左兄弟节点
  neighbor_index = get_neighbor_index(n);
  k_prime_index = neighbor_index == -1 ? 0 : neighbor_index;
  k_prime = n->parent->keys[k_prime_index];
  // 左兄弟节点,或者右兄弟节点
  neighbor = neighbor_index == -1 ? n->parent->pointers[1]
                                  : n->parent->pointers[neighbor_index];

  capacity = n->is_leaf ? order : order - 1;

  if (neighbor->num_keys + n->num_keys <
      capacity)  // 如果两个节点的总key个数都不够order,那就合并
    return coalesce_nodes(root, n, neighbor, neighbor_index, k_prime);
  else  // 如果够order,那就够两个节点分,重新均分下两个接的key
    return redistribute_nodes(root, n, neighbor, neighbor_index, k_prime_index,
                              k_prime);
}

node *delete_op(node *root, int key) {
  node *key_leaf = NULL;
  record *key_record = NULL;

  // 先找叶子节点的记录
  key_record = find(root, key, false, &key_leaf);

  if (key_record != NULL && key_leaf != NULL) {
    // 找到了,开始删除
    root = delete_entry(root, key_leaf, key, key_record);
    free(key_record);
  }
  // 没找到,就直接结束
  return root;
}

void destroy_tree_nodes(node *root) {
  int i;
  if (root->is_leaf)
    for (i = 0; i < root->num_keys; i++) free(root->pointers[i]);
  else
    for (i = 0; i < root->num_keys + 1; i++)
      destroy_tree_nodes(root->pointers[i]);
  free(root->pointers);
  free(root->keys);
  free(root);
}

node *destroy_tree(node *root) {
  destroy_tree_nodes(root);
  return NULL;
}

int main() {
  node *root;
  char instruction;

  root = NULL;

  root = insert(root, 5, 33);
  print_tree(root);
  root = insert(root, 15, 21);
  print_tree(root);
  root = insert(root, 25, 31);
  print_tree(root);
  root = insert(root, 35, 41);
  print_tree(root);
  root = insert(root, 45, 10);

  print_tree(root);

  printf("before delete\n");

  root = delete_op(root, 5);

  print_tree(root);
}

比较简单,就是自上而下的查找树,可以做的一个优化,就是节点内的元素是顺序的,可以用二分进行优化下。

  1. 自上而下找到叶子节点
    1. 如果叶子节点还有空间,即元素个数小于MaxSize,就插入,直接返回成功
    2. 如果叶子节点满了,此时需要将节点拆分成两个节点,新节点是右兄弟节点,然后再将新的节点的最小key值插入到父节点里
      1. 需要注意的是,这里拆分时,需要申请一个临时的数组(可容纳MaxSize+1个元素),然后将当前所有元素拷贝出去,进行正常插入后,最后切分
      2. 我在这踩过坑,直接复用了当前节点的array_,当MaxSize设置为最大值256时,出现数组越界问题。其实报错不是越界,而是修改数据时,发现有别的数组的数据被改乱了。原因就是我在拆分时,越界访问了不属于本节点的page。
    3. 然后递归处理一直到根节点

删除

比较复杂。可以参考链接3的说明和实现,主要是两类操作:合并和重新分布。

  1. 自上而下找到叶子节点
    1. 如果叶子节点还有很多元素,即元素个数大于MinSize,就删除,直接返回成功
    2. 如果叶子节点小于等于MinSize,此时需要看看该节点兄弟给不给力了
      1. 如果比较给力,有很多元素,即兄弟节点元素个数 + 本节点元素个数 > MaxSize()
        1. 重新分布,从兄弟节点挪一个元素过来就行,因为删除只会删一个,因此本节点最少有MinSize -1个元素,只用挪一个就够活
      2. 如果不给力,即兄弟节点元素个数 + 本节点元素个数 < MaxSize()
        1. 合并,删除右边节点,即如果本节点在最左边,那就删除右兄弟节点,其他情况,就是删除本节点, 然后更新父节点的指针,并且需要递归往上
        2. 还需要删除父节点中本节点或者兄弟节点的指针,并递归处理父节点
      3. 如果父节点是根节点,并且只剩一个指针了,那需要删掉父节点,将父节点指针指向的节点设置为根节点

checkpoint 2

迭代器

比较简单,找到最左侧节点,然后顺着指针往后遍历即可

并发

  1. 自上而下的获取锁,当本节点是安全的时候,就可以释放上一层的锁,参考下面链接2的介绍
    1. 安全的判断
      1. 对于写入,就是再写一个,也不会分裂
      2. 对于删除,就是删一个,也不会小于MinSize

其他的注意事项:

  1. MaxSize是指定的
  2. MinSize需要计算
    1. (对应节点类型的MaxSize + 1) / 2
    2. root节点的minSize恒为2
  3. 可以使用自带的Draw函数来将B+数转换为graphivz文件,然后用下面的链接1查看B+树的样子
  4. 提供两个并发自测case考虑
    1. leaf max size和 internal max size都为254时,多插入一些数据看看是否有问题
    2. 还有一个case,叶子节点最大3个元素,中间节点最大5个元素,插入一些数据,1-10000,然后只删1,3,5,7,9等奇数,让它产生连续的合并和重新分布,看看是否有什么问题。
  5. 再提供一个debug的思路
    1. 加锁解锁都打日志,然后加上线程id,项目提供的log_info不带thread id,可以自己修改下加上
    2. 实现锁的跟踪,弄一个全局map,记录线程和加锁的page id, 然后在所有加锁解锁的地方加上记录,当发现二次加锁,或者某次操作完成,检查这个map,发现有为解锁的,说明有问题

最后,重点参考下面链接2,来实现代码

参考:

  1. https://dreampuf.github.io/GraphvizOnline/# 可视化graphviz的网址
  2. https://www.cnblogs.com/JayL-zxl/p/14324297.html
  3. https://oi-wiki.org/ds/bplus-tree/#%E6%9F%A5%E6%89%BE

附录

提供下自测的场景代码供参考:

TEST(BPlusTreeConcurrentTest, SequentialMixTest) {
  // create KeyComparator and index schema
  auto key_schema = ParseCreateStatement("a bigint");
  GenericComparator<8> comparator(key_schema.get());

  auto *disk_manager = new DiskManager("test.db");
  BufferPoolManager *bpm = new BufferPoolManagerInstance(50, disk_manager);
  // create b+ tree
  BPlusTree, RID, GenericComparator<8>> tree("foo_pk", bpm, comparator, 3, 5);
  // GenericKey<8> index_key;

  // create and fetch header_page
  page_id_t page_id;
  auto header_page = bpm->NewPage(&page_id);
  (void)header_page;
  // first, populate index
  std::vector keys = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9};
  int64_t scale_factor = 1000;
  for (int64_t key = 1; key < scale_factor; key++) {
    for (int i = 0; i < 10; i++) {
      keys.push_back(key * 10 + keys[i]);
    }
  }
  InsertHelper((&tree), keys);

  std::vector remove_keys;
  for (int i = 1; i < 10000; i += 2) {
    remove_keys.push_back(i);
  }

  DeleteHelper(&tree, remove_keys);

  std::string out = "a.dot";
  std::cout << out << std::endl;
  tree.Draw(bpm, out);

  bpm->UnpinPage(HEADER_PAGE_ID, true);
  delete disk_manager;
  delete bpm;
  remove("test.db");
  remove("test.log");
}

TEST(BPlusTreeConcurrentTest, MixTest) {
  // create KeyComparator and index schema
  auto key_schema = ParseCreateStatement("a bigint");
  GenericComparator<8> comparator(key_schema.get());

  auto *disk_manager = new DiskManager("test.db");
  BufferPoolManager *bpm = new BufferPoolManagerInstance(50, disk_manager);
  // create b+ tree
  BPlusTree, RID, GenericComparator<8>> tree("foo_pk", bpm, comparator, 254, 254);
  GenericKey<8> index_key;

  // create and fetch header_page
  page_id_t page_id;
  auto header_page = bpm->NewPage(&page_id);
  (void)header_page;
  // first, populate index
  std::vector keys;
  int64_t scale_factor = 10000;
  for (int64_t key = 1; key < scale_factor; key++) {
    keys.push_back(key);
  }
  InsertHelper((&tree), keys);

  std::string out = "a.html";
  std::cout << out << std::endl;
  tree.Draw(bpm, out);

  // concurrent insert
  keys.clear();
  for (int i = 10001; i < 20239; i++) {
    keys.push_back(i);
  }

  std::vector remove_keys;
  for (int i = 1; i < 19100; ++i) {
    remove_keys.push_back(i);
  }

  std::vector remove_keys_2;
  for (int i = 1; i < 20; ++i) {
    remove_keys_2.push_back(i);
  }

  std::vector thread_group;

  thread_group.emplace_back(std::thread(&InsertHelper, (&tree), std::cref(keys), 1));
  // thread_group.emplace_back(std::thread(&InsertHelper, (&tree), std::cref(keys), 4));
  thread_group.emplace_back(std::thread(&DeleteHelper, (&tree), std::cref(remove_keys), 2));
  // thread_group.emplace_back(std::thread(&DeleteHelper, (&tree), std::cref(remove_keys_2), 3));

  // Join the threads with the main thread
  for (uint64_t thread_itr = 0; thread_itr < thread_group.size(); ++thread_itr) {
    thread_group[thread_itr].join();
  }
  int64_t start_key = 19101;
  int64_t size = 0;
  index_key.SetFromInteger(start_key);
  for (auto iterator = tree.Begin(index_key); iterator != tree.End(); ++iterator) {
    size = size + 1;
  }

  EXPECT_EQ(size, 1138);

  bpm->UnpinPage(HEADER_PAGE_ID, true);
  delete disk_manager;
  delete bpm;
  remove("test.db");
  remove("test.log");
}

你可能感兴趣的:(c++,数据结构,算法,数据库)