简化下题意:有10w个键值对,键值分别代表了10w个点在x轴上的坐标和这个点的权值。
现在这10w个点在随机的移动,在某时刻给出一个范围 [L, R] ,求出此时坐标在这个范围内的所有点。
使用C++STL库中自带的multimap
非常方便,现要求设计数据结构并实现,要求:
维护各点移动 并 完成范围查找的总复杂度比multimap
低。
首先,我们如何设计数据结构?
我们先了解下multimap
的底层:红黑树。众所周知,二叉搜索树在查询方面优秀,但是为了维护子树节点的高度,使其保持平衡所带来的消耗巨大,为了解决这个问题才诞生了红黑树,因为它对节点染色,并按颜色规则来辅助维护节点高度的方式,大大减少了插入删除操作对维护上的消耗。
当然,红黑树的原型还是二叉树,虽然它能相对高效的保持树的平衡,但是在存储大量数据的情况下,免不了整棵树的高度越来越高,高度变高带来的缺点就是查询效率变低,因为每经过一次父子节点就意味多一次查询。
那么在大量数据存储的需求下,B树和B+树就诞生了,具体的演变和特点就不多概述,只需要知道B+树插入和查找效率毫不逊色与红黑树,且本身是多叉树。在磁盘存储的环境下,广泛应用于数据库存储引擎的B+树只需高度为3就大概可以存放1170×1170×16=21902400行数据。
那么,针对题目键值都可为double型考虑,我采取的方式是将double型数据乘以100转化为int型存储在我实现的B+树中,取出时作数据转化即可,B+树节点的数据结构设计如下:
分别代表:叶节点标记,键,值,数据长度,指向子节点的指针/指向下一叶节点的指针。
class Node {
bool IS_LEAF;
int *key, *val, size;
Node **ptr;
friend class BPTree;
public:
Node();
};
那么,为了方便进行复杂度比较,我在针对题目在测试代码上作了调整,
数据插入使用for循环保证覆盖,移动使用真随机产生随机数进行,那么multimap实现方式的代码如下:
#include
#include
using namespace std;
int main() {
srand((unsigned int)time(NULL));//在main函数里调用srand函数以实现真正随机
auto j = 0.1;
multimap<double, double> map;
auto t1 = GetTickCount();
for (auto i = 0; i < 100000; i++) {
j += 0.1;
map.insert(make_pair(j, j));
}
printf("插入耗时 : %d\n", GetTickCount() - t1);
auto findCnt = 0;
t1 = GetTickCount();
auto lval = 134.2, rval = 145.2;
for (int i = 1; i <= 100; i++) {
// *****
auto low = map.lower_bound(lval);
auto up = map.upper_bound(rval);
for (auto iter = low; iter != up; iter++) {
printf("%lf ", (*iter).first);
findCnt ++;
}
// *****
printf("第%d次查找, 共找到%d个元素.\n", i, findCnt);
findCnt = 0;
lval += i;
rval += i;
if (i % 9 == 0) {
printf("第%d次随机移动\n", i / 9);
int oldIndex = rand(); // 原位置
int newIndex = rand(); // 新位置
auto findRes = map.find(oldIndex);
if (findRes != map.end()) {
int oldVal = findRes->second; // 记录原位置的值
map.erase(map.find(oldIndex));
map.insert(make_pair(newIndex, oldVal));
}
}
}
printf("带随机移动的查找耗时 : %d\n", GetTickCount() - t1);
return 0;
}
使用B+树实现的代码如下:
#include
#include
using namespace std;
int MAX = 3;
class BPTree;
class Node {
bool IS_LEAF;
int *key, *val, size;
Node **ptr;
friend class BPTree;
public:
Node();
};
class BPTree {
Node *root;
void insertInternal(int, int, Node *, Node *);
void removeInternal(int, Node *, Node *);
Node *findParent(Node *, Node *);
public:
BPTree();
pair<bool, int> findOne(int);
void search(int, int, int *);
void insert(int, int);
void remove(int);
void display(Node *);
Node *getRoot();
};
Node::Node() {
key = new int[MAX];
val = new int[MAX];
ptr = new Node *[MAX + 1];
}
BPTree::BPTree() {
root = NULL;
}
void BPTree::insert(int x, int y) {
if (root == NULL) {
root = new Node;
root->key[0] = x;
root->val[0] = y; // 根节点赋值
root->IS_LEAF = true;
root->size = 1;
} else {
Node *cursor = root;
Node *parent;
while (cursor->IS_LEAF == false) {
parent = cursor;
for (int i = 0; i < cursor->size; i++) {
if (x < cursor->key[i]) {
cursor = cursor->ptr[i];
break;
}
if (i == cursor->size - 1) {
cursor = cursor->ptr[i + 1];
break;
}
}
}
if (cursor->size < MAX) {
int i = 0;
while (x > cursor->key[i] && i < cursor->size) i++;
for (int j = cursor->size; j > i; j--) {
cursor->key[j] = cursor->key[j - 1];
cursor->val[j] = cursor->val[j - 1]; // 向后移动
}
cursor->key[i] = x;
cursor->val[i] = y; // 插入值
cursor->size++;
cursor->ptr[cursor->size] = cursor->ptr[cursor->size - 1];
cursor->ptr[cursor->size - 1] = NULL;
} else {
Node *newLeaf = new Node;
int virtualNode[MAX + 1];
int virtualValue[MAX + 1]; // 定义数组暂存值
for (int i = 0; i < MAX; i++) {
virtualNode[i] = cursor->key[i];
virtualValue[i] = cursor->val[i]; // 复制值
}
int i = 0, j;
while (x > virtualNode[i] && i < MAX) i++;
for (int j = MAX + 1; j > i; j--) {
virtualNode[j] = virtualNode[j - 1];
virtualValue[j] = virtualValue[j - 1]; // 向后移动
}
virtualNode[i] = x;
virtualValue[i] = y; // 插入值
newLeaf->IS_LEAF = true;
cursor->size = (MAX + 1) / 2;
newLeaf->size = MAX + 1 - (MAX + 1) / 2;
cursor->ptr[cursor->size] = newLeaf;
newLeaf->ptr[newLeaf->size] = cursor->ptr[MAX];
cursor->ptr[MAX] = NULL;
for (i = 0; i < cursor->size; i++) {
cursor->key[i] = virtualNode[i];
cursor->val[i] = virtualValue[i]; // 复制值
}
for (i = 0, j = cursor->size; i < newLeaf->size; i++, j++) {
newLeaf->key[i] = virtualNode[j];
newLeaf->val[i] = virtualValue[j]; // 复制到新节点
}
if (cursor == root) {
Node *newRoot = new Node;
newRoot->key[0] = newLeaf->key[0];
newRoot->val[0] = newLeaf->val[0]; // 新根
newRoot->ptr[0] = cursor;
newRoot->ptr[1] = newLeaf;
newRoot->IS_LEAF = false;
newRoot->size = 1;
root = newRoot;
} else {
insertInternal(newLeaf->key[0], newLeaf->val[0], parent, newLeaf);
}
}
}
}
void BPTree::insertInternal(int x, int y, Node *cursor, Node *child) {
if (cursor->size < MAX) {
int i = 0;
while (x > cursor->key[i] && i < cursor->size) i++;
for (int j = cursor->size; j > i; j--) {
cursor->key[j] = cursor->key[j - 1];
cursor->val[j] = cursor->val[j - 1]; // 向后移动
}
for (int j = cursor->size + 1; j > i + 1; j--) {
cursor->ptr[j] = cursor->ptr[j - 1];
}
cursor->key[i] = x;
cursor->val[i] = y; // 插值
cursor->size++;
cursor->ptr[i + 1] = child;
} else {
Node *newInternal = new Node;
int virtualKey[MAX + 1];
int virtualValue[MAX + 1]; // 定义数组暂存值
Node *virtualPtr[MAX + 2];
for (int i = 0; i < MAX; i++) {
virtualKey[i] = cursor->key[i];
virtualValue[i] = cursor->val[i]; // 复制到数组
}
for (int i = 0; i < MAX + 1; i++) {
virtualPtr[i] = cursor->ptr[i];
}
int i = 0, j;
while (x > virtualKey[i] && i < MAX) i++;
for (int j = MAX + 1; j > i; j--) {
virtualKey[j] = virtualKey[j - 1];
virtualValue[j] = virtualValue[j - 1]; // 向前移动
}
virtualKey[i] = x;
for (int j = MAX + 2; j > i + 1; j--) {
virtualPtr[j] = virtualPtr[j - 1];
}
virtualPtr[i + 1] = child;
newInternal->IS_LEAF = false;
cursor->size = (MAX + 1) / 2;
newInternal->size = MAX - (MAX + 1) / 2;
for (i = 0, j = cursor->size + 1; i < newInternal->size; i++, j++) {
newInternal->key[i] = virtualKey[j];
newInternal->val[i] = virtualValue[j]; // 复制到新节点
}
for (i = 0, j = cursor->size + 1; i < newInternal->size + 1; i++, j++) {
newInternal->ptr[i] = virtualPtr[j];
}
if (cursor == root) {
Node *newRoot = new Node;
newRoot->key[0] = cursor->key[cursor->size];
newRoot->val[0] = cursor->val[cursor->size]; // 修改值
newRoot->ptr[0] = cursor;
newRoot->ptr[1] = newInternal;
newRoot->IS_LEAF = false;
newRoot->size = 1;
root = newRoot;
} else {
insertInternal(cursor->key[cursor->size], cursor->val[cursor->size],
findParent(root, cursor), newInternal);
}
}
}
Node *BPTree::findParent(Node *cursor, Node *child) {
Node *parent;
if (cursor->IS_LEAF || (cursor->ptr[0])->IS_LEAF) {
return NULL;
}
for (int i = 0; i < cursor->size + 1; i++) {
if (cursor->ptr[i] == child) {
parent = cursor;
return parent;
} else {
parent = findParent(cursor->ptr[i], child);
if (parent != NULL) return parent;
}
}
return parent;
}
void BPTree::remove(int x) {
if (root == NULL) {
cout << "Tree empty\n";
} else {
Node *cursor = root;
Node *parent;
int leftSibling, rightSibling;
while (cursor->IS_LEAF == false) {
for (int i = 0; i < cursor->size; i++) {
parent = cursor;
leftSibling = i - 1;
rightSibling = i + 1;
if (x < cursor->key[i]) {
cursor = cursor->ptr[i];
break;
}
if (i == cursor->size - 1) {
leftSibling = i;
rightSibling = i + 2;
cursor = cursor->ptr[i + 1];
break;
}
}
}
bool found = false;
int pos;
for (pos = 0; pos < cursor->size; pos++) {
if (cursor->key[pos] == x) {
found = true;
break;
}
}
if (!found) {
cout << "Not found\n";
return;
}
for (int i = pos; i < cursor->size; i++) {
cursor->key[i] = cursor->key[i + 1];
}
cursor->size--;
if (cursor == root) {
for (int i = 0; i < MAX + 1; i++) {
cursor->ptr[i] = NULL;
}
if (cursor->size == 0) {
cout << "Tree died\n";
delete[] cursor->key;
delete[] cursor->ptr;
delete cursor;
root = NULL;
}
return;
}
cursor->ptr[cursor->size] = cursor->ptr[cursor->size + 1];
cursor->ptr[cursor->size + 1] = NULL;
if (cursor->size >= (MAX + 1) / 2) {
return;
}
if (leftSibling >= 0) {
Node *leftNode = parent->ptr[leftSibling];
if (leftNode->size >= (MAX + 1) / 2 + 1) {
for (int i = cursor->size; i > 0; i--) {
cursor->key[i] = cursor->key[i - 1];
}
cursor->size++;
cursor->ptr[cursor->size] = cursor->ptr[cursor->size - 1];
cursor->ptr[cursor->size - 1] = NULL;
cursor->key[0] = leftNode->key[leftNode->size - 1];
leftNode->size--;
leftNode->ptr[leftNode->size] = cursor;
leftNode->ptr[leftNode->size + 1] = NULL;
parent->key[leftSibling] = cursor->key[0];
return;
}
}
if (rightSibling <= parent->size) {
Node *rightNode = parent->ptr[rightSibling];
if (rightNode->size >= (MAX + 1) / 2 + 1) {
cursor->size++;
cursor->ptr[cursor->size] = cursor->ptr[cursor->size - 1];
cursor->ptr[cursor->size - 1] = NULL;
cursor->key[cursor->size - 1] = rightNode->key[0];
rightNode->size--;
rightNode->ptr[rightNode->size] = rightNode->ptr[rightNode->size + 1];
rightNode->ptr[rightNode->size + 1] = NULL;
for (int i = 0; i < rightNode->size; i++) {
rightNode->key[i] = rightNode->key[i + 1];
}
parent->key[rightSibling - 1] = rightNode->key[0];
return;
}
}
if (leftSibling >= 0) {
Node *leftNode = parent->ptr[leftSibling];
for (int i = leftNode->size, j = 0; j < cursor->size; i++, j++) {
leftNode->key[i] = cursor->key[j];
}
leftNode->ptr[leftNode->size] = NULL;
leftNode->size += cursor->size;
leftNode->ptr[leftNode->size] = cursor->ptr[cursor->size];
removeInternal(parent->key[leftSibling], parent, cursor);
delete[] cursor->key;
delete[] cursor->ptr;
delete cursor;
} else if (rightSibling <= parent->size) {
Node *rightNode = parent->ptr[rightSibling];
for (int i = cursor->size, j = 0; j < rightNode->size; i++, j++) {
cursor->key[i] = rightNode->key[j];
}
cursor->ptr[cursor->size] = NULL;
cursor->size += rightNode->size;
cursor->ptr[cursor->size] = rightNode->ptr[rightNode->size];
cout << "Merging two leaf nodes\n";
removeInternal(parent->key[rightSibling - 1], parent, rightNode);
delete[] rightNode->key;
delete[] rightNode->ptr;
delete rightNode;
}
}
}
void BPTree::removeInternal(int x, Node *cursor, Node *child) {
if (cursor == root) {
if (cursor->size == 1) {
if (cursor->ptr[1] == child) {
delete[] child->key;
delete[] child->ptr;
delete child;
root = cursor->ptr[0];
delete[] cursor->key;
delete[] cursor->ptr;
delete cursor;
cout << "Changed root node\n";
return;
} else if (cursor->ptr[0] == child) {
delete[] child->key;
delete[] child->ptr;
delete child;
root = cursor->ptr[1];
delete[] cursor->key;
delete[] cursor->ptr;
delete cursor;
cout << "Changed root node\n";
return;
}
}
}
int pos;
for (pos = 0; pos < cursor->size; pos++) {
if (cursor->key[pos] == x) {
break;
}
}
for (int i = pos; i < cursor->size; i++) {
cursor->key[i] = cursor->key[i + 1];
}
for (pos = 0; pos < cursor->size + 1; pos++) {
if (cursor->ptr[pos] == child) {
break;
}
}
for (int i = pos; i < cursor->size + 1; i++) {
cursor->ptr[i] = cursor->ptr[i + 1];
}
cursor->size--;
if (cursor->size >= (MAX + 1) / 2 - 1) {
return;
}
if (cursor == root) return;
Node *parent = findParent(root, cursor);
int leftSibling, rightSibling;
for (pos = 0; pos < parent->size + 1; pos++) {
if (parent->ptr[pos] == cursor) {
leftSibling = pos - 1;
rightSibling = pos + 1;
break;
}
}
if (leftSibling >= 0) {
Node *leftNode = parent->ptr[leftSibling];
if (leftNode->size >= (MAX + 1) / 2) {
for (int i = cursor->size; i > 0; i--) {
cursor->key[i] = cursor->key[i - 1];
}
cursor->key[0] = parent->key[leftSibling];
parent->key[leftSibling] = leftNode->key[leftNode->size - 1];
for (int i = cursor->size + 1; i > 0; i--) {
cursor->ptr[i] = cursor->ptr[i - 1];
}
cursor->ptr[0] = leftNode->ptr[leftNode->size];
cursor->size++;
leftNode->size--;
return;
}
}
if (rightSibling <= parent->size) {
Node *rightNode = parent->ptr[rightSibling];
if (rightNode->size >= (MAX + 1) / 2) {
cursor->key[cursor->size] = parent->key[pos];
parent->key[pos] = rightNode->key[0];
for (int i = 0; i < rightNode->size - 1; i++) {
rightNode->key[i] = rightNode->key[i + 1];
}
cursor->ptr[cursor->size + 1] = rightNode->ptr[0];
for (int i = 0; i < rightNode->size; ++i) {
rightNode->ptr[i] = rightNode->ptr[i + 1];
}
cursor->size++;
rightNode->size--;
return;
}
}
if (leftSibling >= 0) {
Node *leftNode = parent->ptr[leftSibling];
leftNode->key[leftNode->size] = parent->key[leftSibling];
for (int i = leftNode->size + 1, j = 0; j < cursor->size; j++) {
leftNode->key[i] = cursor->key[j];
}
for (int i = leftNode->size + 1, j = 0; j < cursor->size + 1; j++) {
leftNode->ptr[i] = cursor->ptr[j];
cursor->ptr[j] = NULL;
}
leftNode->size += cursor->size + 1;
cursor->size = 0;
removeInternal(parent->key[leftSibling], parent, cursor);
} else if (rightSibling <= parent->size) {
Node *rightNode = parent->ptr[rightSibling];
cursor->key[cursor->size] = parent->key[rightSibling - 1];
for (int i = cursor->size + 1, j = 0; j < rightNode->size; j++) {
cursor->key[i] = rightNode->key[j];
}
for (int i = cursor->size + 1, j = 0; j < rightNode->size + 1; j++) {
cursor->ptr[i] = rightNode->ptr[j];
rightNode->ptr[j] = NULL;
}
cursor->size += rightNode->size + 1;
rightNode->size = 0;
removeInternal(parent->key[rightSibling - 1], parent, rightNode);
}
}
void BPTree::display(Node *cursor) {
if (cursor != NULL) {
if(cursor->IS_LEAF) {
for (int i = 0; i < cursor->size; i++) {
cout << cursor->key[i] << " ";
}
cout << "\n";
}
if (cursor->IS_LEAF != true) {
for (int i = 0; i < cursor->size + 1; i++) {
display(cursor->ptr[i]);
}
}
}
}
void BPTree::search(int lval, int rval, int *cnt) {
Node *cursor = root;
while (!cursor->IS_LEAF) {
for (int i = 0; i < cursor->size; i++) {
if (lval < cursor->key[i]) {
cursor = cursor->ptr[i];
break;
}
if (i == cursor->size - 1) {
cursor = cursor->ptr[i + 1];
break;
}
}
}
bool flag = true;
while (true) {
if (!flag || !cursor->IS_LEAF) break;
for (int i = 0; i < cursor->size; i++) {
if (cursor->key[i] >= lval && cursor->key[i] <= rval) {
printf("%lf ", 1.0 * cursor->key[i] / 100);
(*cnt) ++;
} else if (cursor->key[i] > rval){
flag = false;
break;
}
}
cursor = cursor->ptr[cursor->size];
}
}
pair<bool, int> BPTree::findOne(int x) {
Node *cursor = root;
while (!cursor->IS_LEAF) {
for (int i = 0; i < cursor->size; i++) {
if (x < cursor->key[i]) {
cursor = cursor->ptr[i];
break;
}
if (i == cursor->size - 1) {
cursor = cursor->ptr[i + 1];
break;
}
}
}
if (cursor->IS_LEAF) {
return {true, cursor->val[0]};
} else {
return {false, 0};
}
}
Node *BPTree::getRoot() {
return root;
}
int main() {
srand((unsigned int)time(NULL));//在main函数里调用srand函数以实现真正随机
auto j = 0.1;
BPTree node;
auto t1 = GetTickCount();
for (auto i = 1; i <= 100000; i++) {
j += 0.1;
node.insert(j * 100, j * 100);
}
printf("插入耗时 : %d\n", GetTickCount() - t1);
auto findCnt = 0;
t1 = GetTickCount();
auto lval = 134.2, rval = 145.2;
for (int i = 1; i <= 100; i++) {
// *****
node.search(lval * 100, rval * 100, &findCnt);
// *****
printf("第%d次查找, 共找到%d个元素.\n", i, findCnt);
findCnt = 0;
lval += i;
rval += i;
if (i % 9 == 0) {
printf("第%d次随机移动\n", i / 9);
int oldIndex = rand(); // 原位置
int newIndex = rand(); // 新位置
pair<bool, int> findRes = node.findOne(oldIndex);
if (findRes.first == true) {
node.remove(oldIndex);
node.insert(newIndex, findRes.second);
}
}
}
printf("带随机移动的查找耗时 : %d\n", GetTickCount() - t1);
return 0;
}
10w个double键值对作基础数据,为了放大差距,进行了100次范围查找,11次随机移动,
本人运行环境为:CPU:AMD Ryzen 7 6800H,语言标准:C++11, IDE:Dev-C++5.11。
两份代码分别连续跑十次取平均值:
multimap实现方式:
1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 平均 |
---|---|---|---|---|---|---|---|---|---|---|
4250 | 4437 | 4313 | 4297 | 4281 | 4531 | 4484 | 4328 | 4391 | 4640 | 4395.2 |
B+树实现方式:
1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 平均 |
---|---|---|---|---|---|---|---|---|---|---|
4047 | 4204 | 4250 | 3985 | 4032 | 4016 | 4078 | 4141 | 4032 | 4047 | 4083.2 |
效率提升:(4395.2 - 4083.2)/ 4395.2 = 7.1%
总结:全网查阅了大量B+树的相关资料,在插入和删除方面分多种情况考虑的B+树实在是优秀,拙劣的实现了下,并自行编写了区间查找和单点查找。虽然在要求的维护和查找上效率已经超过了multimap,但是已知缺点:在10w个数据的插入上,手动实现的B+树实在是非常慢,边建树边维护,各种单点插值,节点合并,节点拆分来维护。