排序算法写了几遍,总是过段时间就忘,故在此汇总下。
写排序算法重要的是理解它的原理,找到如何遍历及遍历和终止的条件
从左建立有序区,将右侧的值依次插入该有序区,有序区中从插入的位置开始依次后移一位;从左往右遍历
void InsertSort(std::vector<int>& datas)
{
//等待排序区间,插入到有序区
for(size_t i = 1; i < datas.size(); ++i)
{
//有序区间
for(size_t j = 0; j < i; ++j)
{
//按从小到大的顺序,遇到第一个比他小的值即终止,依次后移
if(datas[i] < datas[j])
{
int value = datas[i];
for(size_t k = i; k > j; --k)
datas[k] = datas[k-1];
datas[j] = value;
break;
}
}
}
}
从左到右相邻的数依次比较,值最大或最小的依次冒出;从右往左遍历
void BubbleSort(std::vector<int>& datas)
{
//当前需排序的值
for(size_t i = datas.size(); i > 0 ; --i)
{
//从第一个位置开始,依次冒出相邻两个数
for(size_t j = 0; j < i-1; ++j)
{
if(datas[j] > datas[j+1])
{
datas[j] += datas[j+1];
datas[j+1] = datas[j] - datas[j+1];
datas[j] -= datas[j+1];
}
}
}
}
从第一个位置起,依次选择出该位置到结束的最小或最大值,放在当前位置
void SelectSort(std::vector<int>& datas)
{
for(size_t i = 0; i < datas.size(); ++i)
{
for(size_t j = i+1; j < datas.size(); ++j)
{
if(datas[i] > datas[j])
{
datas[i] ^= datas[j];
datas[j] ^= datas[i];
datas[i] ^= datas[j];
}
}
}
}
两两比较排序;以2的倍数逐渐归并
void MergeSort(std::vector<int>& datas)
{
//两两比较一次
for(size_t i = 0; i+1 < datas.size(); i+=2)
{
if(datas[i] > datas[i+1])
{
int value = datas[i];
datas[i] = datas[i+1];
datas[i+1] = value;
}
}
std::vector<int> *new_datas = new std::vector<int>();
new_datas->assign(datas.size(), 0);
//逐渐归并,每次归并的大小,2,4,8,16...
for(size_t gap = 2; gap < datas.size(); gap*=2)
{
//当gap=2时:1,2与3,4归并; 5,6与7,8归并 至结束
int assign_idx =0;
for(size_t idx = 0; idx+gap < datas.size(); idx+=2*gap)
{
size_t i = 0;
size_t j = 0;
//归并时依次选择较小的数
while(i < gap && j < gap && idx+gap+j < datas.size())
{
if(datas[idx+i] < datas[idx+gap+j])
(*new_datas)[assign_idx++] = datas[idx+(i++)];
else
(*new_datas)[assign_idx++] = datas[idx+gap+(j++)];
}
if(i==gap)
{
//注意右侧的值不能越界
for(; j < gap && idx+gap+j < datas.size();)
(*new_datas)[assign_idx++] = datas[idx+gap+(j++)];
}
else
{
for(; i < gap;)
(*new_datas)[assign_idx++] = datas[idx+(i++)];
}
}
//将当前已归并的数赋值给data;开始使用swap,但最后一组数据有可能没赋值给new_datas,会出错
for(int i = 0; i < assign_idx; ++i)
datas[i] = (*new_datas)[i];
}
delete new_datas;
}
1.第一次时从含叶子节点处开始,建立一次大根堆 2.将堆顶依次与堆底交换,新的堆顶数据与较大值交换,排序lg(n)次,该堆又为有序堆
void HeapOrder(std::vector<int>& data, size_t cur, size_t max)
{
size_t l = 2*cur;//左节点
size_t r = 2*cur + 1;//右节点
size_t large = cur;
if(l <= max && data[large-1] < data[l-1])
large = l;
if(r <= max && data[large-1] < data[r-1])
large = r;
//当堆顶比左右节点大,返回;当堆顶为左或者右节点时,递归其左或者右节点,直到结束
if(large != cur)
{
int value = data[cur-1];
data[cur-1] = data[large-1];
data[large-1] = value;
HeapOrder(data, large, max);
}
}
void HeapSort(std::vector<int>& datas)
{
//第一次堆排序
for(size_t i = datas.size()/2; i > 0; --i)
HeapOrder(datas, i, datas.size());
//当前未排序的堆大小
for(size_t unsorted_pos = datas.size(); unsorted_pos > 1; --unsorted_pos)
{
//将堆中最大或最小值赋值给和未排序堆最后一位交换位置,未排序堆个数将-1
int value = datas[0];
datas[0] = datas[unsorted_pos-1];
datas[unsorted_pos-1] = value;
HeapOrder(datas, 1, unsorted_pos-1);
}
}
1.从第一个位置开始,从最右往左遍历(如果从左往右比较的结果将无效):比它大或者等于,右侧值递减;比它小交换两个数,再从左往右遍历
2.从左往右遍历:比它小或者等于,左侧值递增;比它大交换两个数,再从右往左遍历
3.直到左右两个数相等,当前该数左侧的值小于等于它,右侧的值大于等于它,这个数已排序好
4.递归排序它的左侧,它的右侧
void QuickSort(std::vector<int>& datas, size_t low, size_t high)
{
if(low >= high)
return;
size_t left = low;
size_t right = high;
//最开始从右往左开始遍历
bool search_from_right = true;
while(left != right)
{
if(search_from_right)
{
if(datas[left-1] <= datas[right-1])
right--;
else
{
int value = datas[left-1];
datas[left-1] = datas[right-1];
datas[right-1] = value;
search_from_right = false;
left++;
}
}
else
{
if(datas[left-1] <= datas[right-1])
left++;
else
{
int value = datas[right-1];
datas[right-1] = datas[left-1];
datas[left-1] = value;
search_from_right = true;
right--;
}
}
}
QuickSort(datas, low, left-1);
QuickSort(datas, left+1, high);
}
///快速排序
void QuickSort(std::vector<int>& datas)
{
QuickSort(datas, 1, datas.size());
}
分配较大空间数,出现该数递增,遍历空间
void CountSort(std::vector<int>& datas, int max)
{
static std::vector<int> count_array(max, 0);
count_array.assign(max, 0);
for(size_t i = 0; i < datas.size(); ++i)
count_array[datas[i]-1]++;
int cur_pos = 0;
for(int i = 0; i < max; ++i)
{
if(count_array[i] != 0)
{
for(int j = count_array[i]; j != 0; --j)
datas[cur_pos++] = i+1;
}
}
}
//为了方便后续测试,将1000000 作为最大数
void CountSort(std::vector<int>& datas)
{
static std::vector<int> count_array(1000000, 0);
count_array.assign(1000000, 0);
for(size_t i = 0; i < datas.size(); ++i)
count_array[datas[i]-1]++;
int cur_pos = 0;
for(int i = 0; i < 1000000; ++i)
{
if(count_array[i] != 0)
{
for(int j = count_array[i]; j != 0; --j)
datas[cur_pos++] = i+1;
}
}
}
开始分配基数个空间,当前位在哪个基数上,就添加给那个基数空间;从左到右,每次排序后将当前排序值赋值给datas;从低位到高位依次排序
void RadixSort(std::vector<int>& datas, const int radix)
{
bool is_run = true;
int radix_pos = 0;
static vector<vector<int> > radix_vecs(radix);
radix_vecs.assign(radix, vector<int>());
while(is_run)
{
is_run = false;
for(size_t i = 0; i < datas.size(); ++i)
{
int pow_value = std::pow(radix, radix_pos);
int div_value = datas[i]/pow_value;
//当前选择哪个位置
int mod_value = div_value%radix;
//当高位还有值时继续遍历,所有高位都为0时,停止遍历
if(div_value/radix != 0)
is_run = true;
//按位置存储
radix_vecs[mod_value].push_back(datas[i]);
}
++radix_pos;
//将当前位排序好后赋值给datas
for(int i = 0, idx = 0; i < radix; ++i)
{
for(size_t j = 0; j < radix_vecs[i].size(); ++j)
datas[idx++] = radix_vecs[i][j];
radix_vecs[i].clear();
}
}
}
//为了方便后续测试,将10 作为基数
void RadixSort(std::vector<int>& datas)
{
bool is_run = true;
int radix_pos = 0;
static vector<vector<int> > radix_vecs(10);
radix_vecs.assign(10, vector<int>());
while(is_run)
{
is_run = false;
for(size_t i = 0; i < datas.size(); ++i)
{
int pow_value = std::pow(10, radix_pos);
int div_value = datas[i]/pow_value;
//当前选择哪个位置
int mod_value = div_value%10;
if(div_value/10 != 0)
is_run = true;
//按位置存储
radix_vecs[mod_value].push_back(datas[i]);
}
++radix_pos;
//将当前位排序好后赋值给datas
for(int i = 0, idx = 0; i < 10; ++i)
{
for(size_t j = 0; j < radix_vecs[i].size(); ++j)
datas[idx++] = radix_vecs[i][j];
radix_vecs[i].clear();
}
}
}
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
typedef void (*SortFun)(std::vector<int>&);
//输出数据
template<class SStream>
SStream& operator << (SStream& os, const vector<int>& vec)
{
for(size_t idx = 0; idx < vec.size(); ++idx)
os << vec[idx] << ",";
os << endl;
return os;
}
//随机分配数据,count_max:分配的最大个数 value_max:分配的最大值
void GetData(std::vector<int>& datas, int count_max = 10000, int value_max = 1000000)
{
int rand_count = rand()%count_max+1;
int rand_num;
for(int i = 0; i < rand_count; ++i)
{
rand_num = rand()%value_max;
datas.push_back(rand_num);
}
}
//检验是否是有序
bool CheckSort(const std::vector<int>& datas)
{
for(size_t idx = 0; idx+1 < datas.size(); ++idx)
{
if(datas[idx+1] < datas[idx])
{
cout << datas << endl;
cout << "idx:" << idx << ", " << datas[idx] << ", " << datas[idx+1] << endl;
return false;
}
}
return true;
}
//检验两结果是否一致
bool CheckResult(const std::vector<int>& datas1, const std::vector<int>& datas2)
{
if(datas1.size() != datas2.size())
return false;
for(size_t i = 0; i < datas1.size(); ++i)
if(datas1[i] != datas2[i])
return false;
return true;
}
int main()
{
srand(time(NULL));
//测试多少次
int test_times = 1000;
//获取数据
vector<vector<int> *>* total_datas = new vector<vector<int> *>();
//产生1000个这样的测试数据
for(int i = 0; i < test_times; ++i)
{
vector<int> *tmp = new vector<int>();
GetData(*tmp);
total_datas->push_back(tmp);
}
cout << "Create Data success!" << endl;
//检验正确性
for(size_t i = 0; i < total_datas->size(); ++i)
{
vector<int> *tmp = new vector<int>();
vector<int> *tmp1 = new vector<int>();
//前三种排序速度太慢,可先将test_times设成较小值测试正确性
//*tmp = *((*total_datas)[i]);
//InsertSort(*tmp);
//CheckSort(*tmp);
//*tmp = *((*total_datas)[i]);
//BubbleSort(*tmp);
//CheckSort(*tmp);
//*tmp = *((*total_datas)[i]);
//SelectSort(*tmp);
//CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
QuickSort(*tmp);
CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
HeapSort(*tmp);
//如果数据不正确,可将数据输出后进行断点调试
if(!CheckSort(*tmp))
{
cout << *((*total_datas)[i]) << endl;
}
*tmp = *((*total_datas)[i]);
MergeSort(*tmp);
CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
CountSort(*tmp, 1000000);
CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
RadixSort(*tmp);
CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
QuickSort(*tmp);
*tmp1 = *((*total_datas)[i]);
MergeSort(*tmp1);
//比较两种排序的结果是否一致
if(!CheckResult(*tmp, *tmp1))
{
cout << *((*total_datas)[i]) << endl;
break;
}
delete tmp;
delete tmp1;
cout << i << endl;
}
//检验时间
vector funs(5);
funs[0] = MergeSort;
funs[1] = RadixSort;
funs[2] = QuickSort;
funs[3] = CountSort;
funs[4] = HeapSort;
for(size_t idx = 0; idx < funs.size(); ++idx)
{
struct timeval start_time, end_time;
gettimeofday(&start_time, NULL);
for(size_t i = 0; i < total_datas->size(); ++i)
{
vector<int> *tmp = new vector<int>();
*tmp = *((*total_datas)[i]);
funs[idx](*tmp);
}
gettimeofday(&end_time, NULL);
cout << "runtime:" << (end_time.tv_sec-start_time.tv_sec)*1000 + (end_time.tv_usec-start_time.tv_usec)/1000 << endl;
}
return 0;
}
结果如下:
速度依次是:快速排序>归并排序>堆排序>基数排序>计数排序
特殊情况下基数排序和计数排序可能更快,归并排序和堆排序效率接近相等但都小于快排,其它三种蜗牛排序忽略