算法挑战(2022-12)

题目

简化下题意:有10w个键值对,键值分别代表了10w个点在x轴上的坐标和这个点的权值。
现在这10w个点在随机的移动,在某时刻给出一个范围 [L, R] ,求出此时坐标在这个范围内的所有点。

使用C++STL库中自带的multimap非常方便,现要求设计数据结构并实现,要求:
维护各点移动完成范围查找的总复杂度比multimap低。

思路

首先,我们如何设计数据结构?
我们先了解下multimap的底层:红黑树。众所周知,二叉搜索树在查询方面优秀,但是为了维护子树节点的高度,使其保持平衡所带来的消耗巨大,为了解决这个问题才诞生了红黑树,因为它对节点染色,并按颜色规则来辅助维护节点高度的方式,大大减少了插入删除操作对维护上的消耗。

当然,红黑树的原型还是二叉树,虽然它能相对高效的保持树的平衡,但是在存储大量数据的情况下,免不了整棵树的高度越来越高,高度变高带来的缺点就是查询效率变低,因为每经过一次父子节点就意味多一次查询。

那么在大量数据存储的需求下,B树和B+树就诞生了,具体的演变和特点就不多概述,只需要知道B+树插入和查找效率毫不逊色与红黑树,且本身是多叉树。在磁盘存储的环境下,广泛应用于数据库存储引擎的B+树只需高度为3就大概可以存放1170×1170×16=21902400行数据。

那么,针对题目键值都可为double型考虑,我采取的方式是将double型数据乘以100转化为int型存储在我实现的B+树中,取出时作数据转化即可,B+树节点的数据结构设计如下:
分别代表:叶节点标记,键,值,数据长度,指向子节点的指针/指向下一叶节点的指针。

class Node {
		bool IS_LEAF;
		int *key, *val, size;
		Node **ptr;
		friend class BPTree;

	public:
		Node();
};

那么,为了方便进行复杂度比较,我在针对题目在测试代码上作了调整,
数据插入使用for循环保证覆盖,移动使用真随机产生随机数进行,那么multimap实现方式的代码如下:

#include 
#include 
using namespace std;

int main() {
	srand((unsigned int)time(NULL));//在main函数里调用srand函数以实现真正随机
	
	auto j = 0.1;
	multimap<double, double> map;
	auto t1 = GetTickCount();
	for (auto i = 0; i < 100000; i++) {
		j += 0.1;
		map.insert(make_pair(j, j));
	}
	printf("插入耗时 : %d\n", GetTickCount() - t1);
	
	auto findCnt = 0;
	t1 = GetTickCount();
	
	auto lval = 134.2, rval = 145.2;
	for (int i = 1; i <= 100; i++) {
		// *****
		auto low = map.lower_bound(lval);
		auto up = map.upper_bound(rval);
		for (auto iter = low; iter != up; iter++) {
			printf("%lf ", (*iter).first);
			findCnt ++;
		}
		// *****
		printf("第%d次查找, 共找到%d个元素.\n", i, findCnt);
		findCnt = 0;
		lval += i;
		rval += i;
		
		if (i % 9 == 0) {
			printf("第%d次随机移动\n", i / 9);
			int oldIndex = rand(); // 原位置 
			int newIndex = rand(); // 新位置 
			auto findRes = map.find(oldIndex);
			if (findRes != map.end()) {
				int oldVal = findRes->second; // 记录原位置的值 
				map.erase(map.find(oldIndex)); 
				map.insert(make_pair(newIndex, oldVal));
			}
		}
	}	
	
	printf("带随机移动的查找耗时 : %d\n", GetTickCount() - t1);
	return 0;
}

使用B+树实现的代码如下:

#include 
#include 
using namespace std;
int MAX = 3;

class BPTree;

class Node {
		bool IS_LEAF;
		int *key, *val, size;
		Node **ptr;
		friend class BPTree;

	public:
		Node();
};

class BPTree {
		Node *root;
		void insertInternal(int, int, Node *, Node *);
		void removeInternal(int, Node *, Node *);
		Node *findParent(Node *, Node *);

	public:
		BPTree();
		pair<bool, int> findOne(int);
		void search(int, int, int *);
		void insert(int, int);
		void remove(int);
		void display(Node *);
		Node *getRoot();
};

Node::Node() {
	key = new int[MAX];
	val = new int[MAX];
	ptr = new Node *[MAX + 1];
}

BPTree::BPTree() {
	root = NULL;
}

void BPTree::insert(int x, int y) {
	if (root == NULL) {
		root = new Node;
		root->key[0] = x;
		root->val[0] = y; // 根节点赋值 
		root->IS_LEAF = true;
		root->size = 1;
	} else {
		Node *cursor = root;
		Node *parent;
		while (cursor->IS_LEAF == false) {
			parent = cursor;
			for (int i = 0; i < cursor->size; i++) {
				if (x < cursor->key[i]) {
					cursor = cursor->ptr[i];
					break;
				}
				if (i == cursor->size - 1) {
					cursor = cursor->ptr[i + 1];
					break;
				}
			}
		}
		if (cursor->size < MAX) {
			int i = 0;
			while (x > cursor->key[i] && i < cursor->size) i++;
			for (int j = cursor->size; j > i; j--) {
				cursor->key[j] = cursor->key[j - 1];
				cursor->val[j] = cursor->val[j - 1]; // 向后移动 
			}
			cursor->key[i] = x;
			cursor->val[i] = y; // 插入值 
			cursor->size++;
			cursor->ptr[cursor->size] = cursor->ptr[cursor->size - 1];
			cursor->ptr[cursor->size - 1] = NULL;
		} else {
			Node *newLeaf = new Node;
			int virtualNode[MAX + 1];
			int virtualValue[MAX + 1]; // 定义数组暂存值 
			for (int i = 0; i < MAX; i++) {
				virtualNode[i] = cursor->key[i];
				virtualValue[i] = cursor->val[i]; // 复制值 
			}
			int i = 0, j;
			while (x > virtualNode[i] && i < MAX) i++;
			for (int j = MAX + 1; j > i; j--) {
				virtualNode[j] = virtualNode[j - 1];
				virtualValue[j] = virtualValue[j - 1]; // 向后移动 
			}
			virtualNode[i] = x;
			virtualValue[i] = y; // 插入值 
			newLeaf->IS_LEAF = true;
			cursor->size = (MAX + 1) / 2;
			newLeaf->size = MAX + 1 - (MAX + 1) / 2;
			cursor->ptr[cursor->size] = newLeaf;
			newLeaf->ptr[newLeaf->size] = cursor->ptr[MAX];
			cursor->ptr[MAX] = NULL;
			for (i = 0; i < cursor->size; i++) {
				cursor->key[i] = virtualNode[i];
				cursor->val[i] = virtualValue[i]; // 复制值
			}
			for (i = 0, j = cursor->size; i < newLeaf->size; i++, j++) {
				newLeaf->key[i] = virtualNode[j];
				newLeaf->val[i] = virtualValue[j]; // 复制到新节点 
			}
			if (cursor == root) {
				Node *newRoot = new Node;
				newRoot->key[0] = newLeaf->key[0];
				newRoot->val[0] = newLeaf->val[0]; // 新根
				newRoot->ptr[0] = cursor;
				newRoot->ptr[1] = newLeaf;
				newRoot->IS_LEAF = false;
				newRoot->size = 1;
				root = newRoot;
			} else {
				insertInternal(newLeaf->key[0], newLeaf->val[0], parent, newLeaf);
			}
		}
	}
}

void BPTree::insertInternal(int x, int y, Node *cursor, Node *child) {
	if (cursor->size < MAX) {
		int i = 0;
		while (x > cursor->key[i] && i < cursor->size) i++;
		for (int j = cursor->size; j > i; j--) {
			cursor->key[j] = cursor->key[j - 1];
			cursor->val[j] = cursor->val[j - 1]; // 向后移动 
		}
		for (int j = cursor->size + 1; j > i + 1; j--) {
			cursor->ptr[j] = cursor->ptr[j - 1]; 
		}
		cursor->key[i] = x;
		cursor->val[i] = y; // 插值 
		cursor->size++;
		cursor->ptr[i + 1] = child;
	} else {
		Node *newInternal = new Node;
		int virtualKey[MAX + 1];
		int virtualValue[MAX + 1]; // 定义数组暂存值 
		Node *virtualPtr[MAX + 2];
		for (int i = 0; i < MAX; i++) {
			virtualKey[i] = cursor->key[i];
			virtualValue[i] = cursor->val[i]; // 复制到数组 
		}
		for (int i = 0; i < MAX + 1; i++) {
			virtualPtr[i] = cursor->ptr[i];
		}
		int i = 0, j;
		while (x > virtualKey[i] && i < MAX) i++;
		for (int j = MAX + 1; j > i; j--) {
			virtualKey[j] = virtualKey[j - 1];
			virtualValue[j] = virtualValue[j - 1]; // 向前移动 
		}
		virtualKey[i] = x;
		for (int j = MAX + 2; j > i + 1; j--) {
			virtualPtr[j] = virtualPtr[j - 1];
		}
		virtualPtr[i + 1] = child;
		newInternal->IS_LEAF = false;
		cursor->size = (MAX + 1) / 2;
		newInternal->size = MAX - (MAX + 1) / 2;
		for (i = 0, j = cursor->size + 1; i < newInternal->size; i++, j++) {
			newInternal->key[i] = virtualKey[j];
			newInternal->val[i] = virtualValue[j]; // 复制到新节点 
		}
		for (i = 0, j = cursor->size + 1; i < newInternal->size + 1; i++, j++) {
			newInternal->ptr[i] = virtualPtr[j];
		}
		if (cursor == root) {
			Node *newRoot = new Node;
			newRoot->key[0] = cursor->key[cursor->size];
			newRoot->val[0] = cursor->val[cursor->size]; // 修改值 
			newRoot->ptr[0] = cursor;
			newRoot->ptr[1] = newInternal;
			newRoot->IS_LEAF = false;
			newRoot->size = 1;
			root = newRoot;
		} else {
			insertInternal(cursor->key[cursor->size], cursor->val[cursor->size], 
				findParent(root, cursor), newInternal);
		}
	}
}

Node *BPTree::findParent(Node *cursor, Node *child) {
	Node *parent;
	if (cursor->IS_LEAF || (cursor->ptr[0])->IS_LEAF) {
		return NULL;
	}
	for (int i = 0; i < cursor->size + 1; i++) {
		if (cursor->ptr[i] == child) {
			parent = cursor;
			return parent;
		} else {
			parent = findParent(cursor->ptr[i], child);
			if (parent != NULL) return parent;
		}
	}
	return parent;
}

void BPTree::remove(int x) {
	if (root == NULL) {
		cout << "Tree empty\n";
	} else {
		Node *cursor = root;
		Node *parent;
		int leftSibling, rightSibling;
		while (cursor->IS_LEAF == false) {
			for (int i = 0; i < cursor->size; i++) {
				parent = cursor;
				leftSibling = i - 1;
				rightSibling = i + 1;
				if (x < cursor->key[i]) {
					cursor = cursor->ptr[i];
					break;
				}
				if (i == cursor->size - 1) {
					leftSibling = i;
					rightSibling = i + 2;
					cursor = cursor->ptr[i + 1];
					break;
				}
			}
		}
		bool found = false;
		int pos;
		for (pos = 0; pos < cursor->size; pos++) {
			if (cursor->key[pos] == x) {
				found = true;
				break;
			}
		}
		if (!found) {
			cout << "Not found\n";
			return;
		}
		for (int i = pos; i < cursor->size; i++) {
			cursor->key[i] = cursor->key[i + 1];
		}
		cursor->size--;
		if (cursor == root) {
			for (int i = 0; i < MAX + 1; i++) {
				cursor->ptr[i] = NULL;
			}
			if (cursor->size == 0) {
				cout << "Tree died\n";
				delete[] cursor->key;
				delete[] cursor->ptr;
				delete cursor;
				root = NULL;
			}
			return;
		}
		cursor->ptr[cursor->size] = cursor->ptr[cursor->size + 1];
		cursor->ptr[cursor->size + 1] = NULL;
		if (cursor->size >= (MAX + 1) / 2) {
			return;
		}
		if (leftSibling >= 0) {
			Node *leftNode = parent->ptr[leftSibling];
			if (leftNode->size >= (MAX + 1) / 2 + 1) {
				for (int i = cursor->size; i > 0; i--) {
					cursor->key[i] = cursor->key[i - 1];
				}
				cursor->size++;
				cursor->ptr[cursor->size] = cursor->ptr[cursor->size - 1];
				cursor->ptr[cursor->size - 1] = NULL;
				cursor->key[0] = leftNode->key[leftNode->size - 1];
				leftNode->size--;
				leftNode->ptr[leftNode->size] = cursor;
				leftNode->ptr[leftNode->size + 1] = NULL;
				parent->key[leftSibling] = cursor->key[0];
				return;
			}
		}
		if (rightSibling <= parent->size) {
			Node *rightNode = parent->ptr[rightSibling];
			if (rightNode->size >= (MAX + 1) / 2 + 1) {
				cursor->size++;
				cursor->ptr[cursor->size] = cursor->ptr[cursor->size - 1];
				cursor->ptr[cursor->size - 1] = NULL;
				cursor->key[cursor->size - 1] = rightNode->key[0];
				rightNode->size--;
				rightNode->ptr[rightNode->size] = rightNode->ptr[rightNode->size + 1];
				rightNode->ptr[rightNode->size + 1] = NULL;
				for (int i = 0; i < rightNode->size; i++) {
					rightNode->key[i] = rightNode->key[i + 1];
				}
				parent->key[rightSibling - 1] = rightNode->key[0];
				return;
			}
		}
		if (leftSibling >= 0) {
			Node *leftNode = parent->ptr[leftSibling];
			for (int i = leftNode->size, j = 0; j < cursor->size; i++, j++) {
				leftNode->key[i] = cursor->key[j];
			}
			leftNode->ptr[leftNode->size] = NULL;
			leftNode->size += cursor->size;
			leftNode->ptr[leftNode->size] = cursor->ptr[cursor->size];
			removeInternal(parent->key[leftSibling], parent, cursor);
			delete[] cursor->key;
			delete[] cursor->ptr;
			delete cursor;
		} else if (rightSibling <= parent->size) {
			Node *rightNode = parent->ptr[rightSibling];
			for (int i = cursor->size, j = 0; j < rightNode->size; i++, j++) {
				cursor->key[i] = rightNode->key[j];
			}
			cursor->ptr[cursor->size] = NULL;
			cursor->size += rightNode->size;
			cursor->ptr[cursor->size] = rightNode->ptr[rightNode->size];
			cout << "Merging two leaf nodes\n";
			removeInternal(parent->key[rightSibling - 1], parent, rightNode);
			delete[] rightNode->key;
			delete[] rightNode->ptr;
			delete rightNode;
		}
	}
}

void BPTree::removeInternal(int x, Node *cursor, Node *child) {
	if (cursor == root) {
		if (cursor->size == 1) {
			if (cursor->ptr[1] == child) {
				delete[] child->key;
				delete[] child->ptr;
				delete child;
				root = cursor->ptr[0];
				delete[] cursor->key;
				delete[] cursor->ptr;
				delete cursor;
				cout << "Changed root node\n";
				return;
			} else if (cursor->ptr[0] == child) {
				delete[] child->key;
				delete[] child->ptr;
				delete child;
				root = cursor->ptr[1];
				delete[] cursor->key;
				delete[] cursor->ptr;
				delete cursor;
				cout << "Changed root node\n";
				return;
			}
		}
	}
	int pos;
	for (pos = 0; pos < cursor->size; pos++) {
		if (cursor->key[pos] == x) {
			break;
		}
	}
	for (int i = pos; i < cursor->size; i++) {
		cursor->key[i] = cursor->key[i + 1];
	}
	for (pos = 0; pos < cursor->size + 1; pos++) {
		if (cursor->ptr[pos] == child) {
			break;
		}
	}
	for (int i = pos; i < cursor->size + 1; i++) {
		cursor->ptr[i] = cursor->ptr[i + 1];
	}
	cursor->size--;
	if (cursor->size >= (MAX + 1) / 2 - 1) {
		return;
	}
	if (cursor == root) return;
	Node *parent = findParent(root, cursor);
	int leftSibling, rightSibling;
	for (pos = 0; pos < parent->size + 1; pos++) {
		if (parent->ptr[pos] == cursor) {
			leftSibling = pos - 1;
			rightSibling = pos + 1;
			break;
		}
	}
	if (leftSibling >= 0) {
		Node *leftNode = parent->ptr[leftSibling];
		if (leftNode->size >= (MAX + 1) / 2) {
			for (int i = cursor->size; i > 0; i--) {
				cursor->key[i] = cursor->key[i - 1];
			}
			cursor->key[0] = parent->key[leftSibling];
			parent->key[leftSibling] = leftNode->key[leftNode->size - 1];
			for (int i = cursor->size + 1; i > 0; i--) {
				cursor->ptr[i] = cursor->ptr[i - 1];
			}
			cursor->ptr[0] = leftNode->ptr[leftNode->size];
			cursor->size++;
			leftNode->size--;
			return;
		}
	}
	if (rightSibling <= parent->size) {
		Node *rightNode = parent->ptr[rightSibling];
		if (rightNode->size >= (MAX + 1) / 2) {
			cursor->key[cursor->size] = parent->key[pos];
			parent->key[pos] = rightNode->key[0];
			for (int i = 0; i < rightNode->size - 1; i++) {
				rightNode->key[i] = rightNode->key[i + 1];
			}
			cursor->ptr[cursor->size + 1] = rightNode->ptr[0];
			for (int i = 0; i < rightNode->size; ++i) {
				rightNode->ptr[i] = rightNode->ptr[i + 1];
			}
			cursor->size++;
			rightNode->size--;
			return;
		}
	}
	if (leftSibling >= 0) {
		Node *leftNode = parent->ptr[leftSibling];
		leftNode->key[leftNode->size] = parent->key[leftSibling];
		for (int i = leftNode->size + 1, j = 0; j < cursor->size; j++) {
			leftNode->key[i] = cursor->key[j];
		}
		for (int i = leftNode->size + 1, j = 0; j < cursor->size + 1; j++) {
			leftNode->ptr[i] = cursor->ptr[j];
			cursor->ptr[j] = NULL;
		}
		leftNode->size += cursor->size + 1;
		cursor->size = 0;
		removeInternal(parent->key[leftSibling], parent, cursor);
	} else if (rightSibling <= parent->size) {
		Node *rightNode = parent->ptr[rightSibling];
		cursor->key[cursor->size] = parent->key[rightSibling - 1];
		for (int i = cursor->size + 1, j = 0; j < rightNode->size; j++) {
			cursor->key[i] = rightNode->key[j];
		}
		for (int i = cursor->size + 1, j = 0; j < rightNode->size + 1; j++) {
			cursor->ptr[i] = rightNode->ptr[j];
			rightNode->ptr[j] = NULL;
		}
		cursor->size += rightNode->size + 1;
		rightNode->size = 0;
		removeInternal(parent->key[rightSibling - 1], parent, rightNode);
	}
}

void BPTree::display(Node *cursor) {
	if (cursor != NULL) {
		if(cursor->IS_LEAF) {
			for (int i = 0; i < cursor->size; i++) {
				cout << cursor->key[i] << " ";
			}
			cout << "\n";
		}
		if (cursor->IS_LEAF != true) {
			for (int i = 0; i < cursor->size + 1; i++) {
				display(cursor->ptr[i]);
			}
		}
	}
}

void BPTree::search(int lval, int rval, int *cnt) {
	Node *cursor = root;
	while (!cursor->IS_LEAF) {
		for (int i = 0; i < cursor->size; i++) {
			if (lval < cursor->key[i]) {
				cursor = cursor->ptr[i];
				break;
			}
			if (i == cursor->size - 1) {
				cursor = cursor->ptr[i + 1];
				break;
			}
		}
	}
	bool flag = true;
	while (true) {
		if (!flag || !cursor->IS_LEAF) break;
		for (int i = 0; i < cursor->size; i++) {
			if (cursor->key[i] >= lval && cursor->key[i] <= rval) {
				printf("%lf ", 1.0 * cursor->key[i] / 100);
				(*cnt) ++;
			} else if (cursor->key[i] > rval){
				flag = false;
				break;
			}
		}
		cursor = cursor->ptr[cursor->size];
	}
}

pair<bool, int> BPTree::findOne(int x) {
	Node *cursor = root;
	while (!cursor->IS_LEAF) {
		for (int i = 0; i < cursor->size; i++) {
			if (x < cursor->key[i]) {
				cursor = cursor->ptr[i];
				break;
			}
			if (i == cursor->size - 1) {
				cursor = cursor->ptr[i + 1];
				break;
			}
		}
	}
	if (cursor->IS_LEAF) {
		return {true, cursor->val[0]};
	} else {
		return {false, 0};
	}
}

Node *BPTree::getRoot() {
	return root;
}

int main() {
	srand((unsigned int)time(NULL));//在main函数里调用srand函数以实现真正随机

	auto j = 0.1;
	BPTree node;
	auto t1 = GetTickCount();
	for (auto i = 1; i <= 100000; i++) {
		j += 0.1;
		node.insert(j * 100, j * 100);
	}
	printf("插入耗时 : %d\n", GetTickCount() - t1);
	
	auto findCnt = 0;
	t1 = GetTickCount();
	
	auto lval = 134.2, rval = 145.2;
	for (int i = 1; i <= 100; i++) {
		// *****
		node.search(lval * 100, rval * 100, &findCnt);
		// *****
		printf("第%d次查找, 共找到%d个元素.\n", i, findCnt);
		findCnt = 0;
		lval += i;
		rval += i;
		
		if (i % 9 == 0) {
			printf("第%d次随机移动\n", i / 9);
			int oldIndex = rand(); // 原位置 
			int newIndex = rand(); // 新位置 
			
			pair<bool, int> findRes = node.findOne(oldIndex);
			if (findRes.first == true) {
				node.remove(oldIndex);
				node.insert(newIndex, findRes.second);
			}
		}
	}
	
	printf("带随机移动的查找耗时 : %d\n", GetTickCount() - t1);
	return 0;
}

结果

10w个double键值对作基础数据,为了放大差距,进行了100次范围查找,11次随机移动,
本人运行环境为:CPU:AMD Ryzen 7 6800H,语言标准:C++11, IDE:Dev-C++5.11。
两份代码分别连续跑十次取平均值:

multimap实现方式:

1 2 3 4 5 6 7 8 9 10 平均
4250 4437 4313 4297 4281 4531 4484 4328 4391 4640 4395.2

B+树实现方式:

1 2 3 4 5 6 7 8 9 10 平均
4047 4204 4250 3985 4032 4016 4078 4141 4032 4047 4083.2

效率提升:(4395.2 - 4083.2)/ 4395.2 = 7.1%

总结:全网查阅了大量B+树的相关资料,在插入和删除方面分多种情况考虑的B+树实在是优秀,拙劣的实现了下,并自行编写了区间查找和单点查找。虽然在要求的维护和查找上效率已经超过了multimap,但是已知缺点:在10w个数据的插入上,手动实现的B+树实在是非常慢,边建树边维护,各种单点插值,节点合并,节点拆分来维护。

你可能感兴趣的:(算法,c++)