源码:https://github.com/freebsd/freebsd/blob/master/lib/libc/stdlib/heapsort.c
使用文档:https://www.freebsd.org/cgi/man.cgi?query=heapsort&sektion=3&manpath=freebsd-release-ports
int heapsort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *));
有4个参数,第一个参数base是一个指向待排序数组的指针变量,第二个参数nmemb是数组的元素数量,第三个参数size是数组中一个元素的内存空间大小,
第四个参数是一个执行带两个指针参数的比较函数的指针,这4个参数和快排qsort的4个参数一样。
经典堆排序算法:
1,建立max heap大顶堆;
2,因为根节点是最大的,所有跟最后一个元素交换,同时把堆的大小减一;
3,针对新的根节点重复做步骤1和步骤2(递归过程),直到堆的大小变为0,停止循环。
建立max heap大顶堆的步骤:
1,判断如果左孩子节点如果比根节点大,那么将最大值变量设置为左孩子节点的索引;
2,判断如果右孩子节点如果比根节点大,那么将最大值变量设置为右孩子节点的索引;
3,如果根节点不是最大值,那交换当前节点和最大值节点,同时针对最大值节点重复做步骤1和步骤2(递归过程),知道当前节点和最大值节点是同一个ie
可以参考这个实现: https://www.geeksforgeeks.org/heap-sort/
对比下实现:
经典:
// Build heap (rearrange array)
for (int i = n / 2 - 1; i >= 0; i--)
heapify(arr, n, i);
FreeBSD heapsort:
for (l = nmemb / 2 + 1; --l;)
CREATE(l, nmemb, i, j, t, p, size, cnt, tmp);
经典:
// One by one extract an element from heap
for (int i=n-1; i>=0; i--)
{
// Move current root to end
swap(arr[0], arr[i]);
// call max heapify on the reduced heap
heapify(arr, i, 0);
}
FreeBSD heapsort:
/*
* For each element of the heap, save the largest element into its
* final slot, save the displaced element (k), then recreate the
* heap.
*/
while (nmemb > 1) {
COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2);
COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2);
--nmemb;
SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);
}
k是临时变量用来存需要交换的元素,base + nmemb * size是最后一个元素vbase数组[nmemb-1],base + size是vbase数组[0];
COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2);//把最有一个元素存到k
COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2);//把第一个元素root存到最后一个元素的位置end上。
SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);//重新建立大顶堆max heap
经典:
// To heapify a subtree rooted with node i which is
// an index in arr[]. n is size of heap
void heapify(int arr[], int n, int i)
{
int largest = i; // Initialize largest as root
int l = 2*i + 1; // left = 2*i + 1
int r = 2*i + 2; // right = 2*i + 2
// If left child is larger than root
if (l < n && arr[l] > arr[largest])
largest = l;
// If right child is larger than largest so far
if (r < n && arr[r] > arr[largest])
largest = r;
// If largest is not root
if (largest != i)
{
swap(arr[i], arr[largest]);
// Recursively heapify the affected sub-tree
heapify(arr, n, largest);
}
}
经典算法步骤:
请看之前的"建立max heap大顶堆的步骤"
FreeBSD heapsort:
/*
* Select the top of the heap and 'heapify'. Since by far the most expensive
* action is the call to the compar function, a considerable optimization
* in the average case can be achieved due to the fact that k, the displaced
* elememt, is usually quite small, so it would be preferable to first
* heapify, always maintaining the invariant that the larger child is copied
* over its parent's record.
*
* Then, starting from the *bottom* of the heap, finding k's correct place,
* again maintianing the invariant. As a result of the invariant no element
* is 'lost' when k is assigned its correct place in the heap.
*
* The time savings from this optimization are on the order of 15-20% for the
* average case. See Knuth, Vol. 3, page 158, problem 18.
*
* XXX Don't break the #define SELECT line, below. Reiser cpp gets upset.
*/
#define SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2) { \
for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) { \
child = base + child_i * size; \
if (child_i < nmemb && COMPAR(child, child + size) < 0) { \
child += size; \
++child_i; \
} \
par = base + par_i * size; \
COPY(par, child, count, size, tmp1, tmp2); \
} \
for (;;) { \
child_i = par_i; \
par_i = child_i / 2; \
child = base + child_i * size; \
par = base + par_i * size; \
if (child_i == 1 || COMPAR(k, par) < 0) { \
COPY(child, k, count, size, tmp1, tmp2); \
break; \
} \
COPY(child, par, count, size, tmp1, tmp2); \
} \
}
这里FreeBSD的做法和经典堆排序建立大顶堆的代码略有区别,经典代码里用的是递归,但是FreeBSD这个函数里没有用到递归;
先看下注释写什么:
/*
*选择堆的顶部和建堆函数。 由于到目前为止,最昂贵的操作是调用比较函数,因此平均情况下,
*由于置换元素k通常很小,因此可以实现相当大的优化,因此最好先进行堆化, 始终 *保持较大的子项被复制到其父项记录上。
*然后,从堆的“底部”开始,找到k的正确位置,再次维护使之不会改变。
*由于不会改变,当为k分配其在堆中的正确位置时,没有元素“丢失”。
*/
这里首先要理解child和par两个变量是做什么的,child是子节点,par是父节点,
创建堆的函数CREATE(initval, nmemb, par_i, child_i, par, child, size, count, tmp)会保证父节点大于子节点,如果不符合,那么会交换。
initval从nmemb / 2 + 1开始建堆,每次循环递减,一直到initval变为0;
i, j, t, p对应par_i, child_i, par, child
par_i, child_i对应第i个位置的父节点和子节点;
大顶堆建立函数:
SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2)
SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2);
解析下这个函数的实现:
//第一for是遍历堆里面的每个元素,将左右子树中大的数存储到par变量里;
for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) {
//第二for也是遍历,但是是从子节点遍历到父节点,
for (;;) {
child_i = par_i; \
par_i = child_i / 2; \
child = base + child_i * size; \
par = base + par_i * size; \
//这个过程中,如果父节点par大于临时变量k(k就是之前我们置换最后一个元素的值存储的临时变量,就是目前的root节点),那么把k的值赋给子节点child并且退出本轮循环;
if (child_i == 1 || COMPAR(k, par) < 0) { \
COPY(child, k, count, size, tmp1, tmp2); \
break; \
} \
//每次遍历最后一步是把父节点par的值赋给子节点child;
COPY(child, par, count, size, tmp1, tmp2);
至此,我们解析了FreeBSD中heapsort的实现,其建堆过程做了“始终保持较大的子项被复制到其父项记录上”的改进来提高效率。