几大常用排序算法编写及正确性、效率测试

排序算法写了几遍,总是过段时间就忘,故在此汇总下。
写排序算法重要的是理解它的原理,找到如何遍历及遍历和终止的条件

插入排序

从左建立有序区,将右侧的值依次插入该有序区,有序区中从插入的位置开始依次后移一位;从左往右遍历

void InsertSort(std::vector<int>& datas)
{
  //等待排序区间,插入到有序区
  for(size_t i = 1; i < datas.size(); ++i)
  {
    //有序区间
    for(size_t j = 0; j < i; ++j)
    {
      //按从小到大的顺序,遇到第一个比他小的值即终止,依次后移
      if(datas[i] < datas[j])
      {
        int value = datas[i];

        for(size_t k = i; k > j; --k)
          datas[k] = datas[k-1];

        datas[j] = value;
        break;
      }
    }
  }
}

冒泡排序

从左到右相邻的数依次比较,值最大或最小的依次冒出;从右往左遍历

void BubbleSort(std::vector<int>& datas)
{
  //当前需排序的值
  for(size_t i = datas.size(); i > 0 ; --i)
  {
    //从第一个位置开始,依次冒出相邻两个数
    for(size_t j = 0; j < i-1; ++j)
    {
      if(datas[j] > datas[j+1])
      {
        datas[j] += datas[j+1];
        datas[j+1] = datas[j] - datas[j+1];
        datas[j] -= datas[j+1];
      }
    }
  }
}

选择排序

从第一个位置起,依次选择出该位置到结束的最小或最大值,放在当前位置

void SelectSort(std::vector<int>& datas)
{
  for(size_t i = 0; i < datas.size(); ++i)
  {
    for(size_t j = i+1; j < datas.size(); ++j)
    {
      if(datas[i] > datas[j])
      {
        datas[i] ^= datas[j];
        datas[j] ^= datas[i];
        datas[i] ^= datas[j];
      }
    }
  }
}

归并排序

两两比较排序;以2的倍数逐渐归并

void MergeSort(std::vector<int>& datas)
{
  //两两比较一次
  for(size_t i = 0; i+1 < datas.size(); i+=2)
  {
    if(datas[i] > datas[i+1])
    {
      int value = datas[i];
      datas[i] = datas[i+1];
      datas[i+1] = value;
    }
  }

  std::vector<int> *new_datas = new std::vector<int>();
  new_datas->assign(datas.size(), 0);

  //逐渐归并,每次归并的大小,2,4,8,16...
  for(size_t gap = 2; gap < datas.size(); gap*=2)
  {
    //当gap=2时:1,2与3,4归并; 5,6与7,8归并 至结束
    int assign_idx =0;
    for(size_t idx = 0; idx+gap < datas.size(); idx+=2*gap)
    {
      size_t i = 0;
      size_t j = 0;
      //归并时依次选择较小的数
      while(i < gap && j < gap && idx+gap+j < datas.size())
      {
        if(datas[idx+i] < datas[idx+gap+j])
          (*new_datas)[assign_idx++] = datas[idx+(i++)];
        else
          (*new_datas)[assign_idx++] = datas[idx+gap+(j++)];
      }
      if(i==gap)
      {
        //注意右侧的值不能越界
        for(; j < gap  && idx+gap+j < datas.size();)
          (*new_datas)[assign_idx++] = datas[idx+gap+(j++)];
      }
      else
      {
        for(; i < gap;)
          (*new_datas)[assign_idx++] = datas[idx+(i++)];
      }
    }

    //将当前已归并的数赋值给data;开始使用swap,但最后一组数据有可能没赋值给new_datas,会出错
    for(int i = 0; i < assign_idx; ++i)
      datas[i] = (*new_datas)[i];
  }

  delete new_datas;
}

堆排序

1.第一次时从含叶子节点处开始,建立一次大根堆 2.将堆顶依次与堆底交换,新的堆顶数据与较大值交换,排序lg(n)次,该堆又为有序堆

void HeapOrder(std::vector<int>& data, size_t cur, size_t max)
{
  size_t l = 2*cur;//左节点
  size_t r = 2*cur + 1;//右节点
  size_t large = cur;
  if(l <= max && data[large-1] < data[l-1])
    large = l;
  if(r <= max && data[large-1] < data[r-1])
    large = r;

  //当堆顶比左右节点大,返回;当堆顶为左或者右节点时,递归其左或者右节点,直到结束
  if(large != cur)
  {
    int value = data[cur-1];
    data[cur-1] = data[large-1];
    data[large-1] = value;

    HeapOrder(data, large, max);
  }
}


void HeapSort(std::vector<int>& datas)
{
  //第一次堆排序
  for(size_t i = datas.size()/2; i > 0; --i)
    HeapOrder(datas, i, datas.size());

  //当前未排序的堆大小
  for(size_t unsorted_pos = datas.size(); unsorted_pos > 1; --unsorted_pos)
  {
    //将堆中最大或最小值赋值给和未排序堆最后一位交换位置,未排序堆个数将-1
    int value = datas[0];
    datas[0] = datas[unsorted_pos-1];
    datas[unsorted_pos-1] = value;

    HeapOrder(datas, 1, unsorted_pos-1);
  }
}

快速排序

1.从第一个位置开始,从最右往左遍历(如果从左往右比较的结果将无效):比它大或者等于,右侧值递减;比它小交换两个数,再从左往右遍历
2.从左往右遍历:比它小或者等于,左侧值递增;比它大交换两个数,再从右往左遍历
3.直到左右两个数相等,当前该数左侧的值小于等于它,右侧的值大于等于它,这个数已排序好
4.递归排序它的左侧,它的右侧

void QuickSort(std::vector<int>& datas, size_t low, size_t high)
{
  if(low >= high)
    return;

  size_t left = low;
  size_t right = high;
  //最开始从右往左开始遍历
  bool search_from_right = true;
  while(left != right)
  {
    if(search_from_right)
    {
      if(datas[left-1] <= datas[right-1])
        right--;
      else
      {
        int value = datas[left-1];
        datas[left-1] = datas[right-1];
        datas[right-1] = value;
        search_from_right = false;
        left++;
      }
    }
    else
    {
      if(datas[left-1] <= datas[right-1])
        left++;
      else
      {
        int value = datas[right-1];
        datas[right-1] = datas[left-1];
        datas[left-1] = value;
        search_from_right = true;
        right--;
      }
    }
  }

  QuickSort(datas, low, left-1);
  QuickSort(datas, left+1, high);
}

///快速排序
void QuickSort(std::vector<int>& datas)
{
  QuickSort(datas, 1, datas.size());
}

计数排序

分配较大空间数,出现该数递增,遍历空间

void CountSort(std::vector<int>& datas, int max)
{
  static std::vector<int> count_array(max, 0);
  count_array.assign(max, 0);

  for(size_t i = 0; i < datas.size(); ++i)
    count_array[datas[i]-1]++;

  int cur_pos = 0;
  for(int i = 0; i < max; ++i)
  {
    if(count_array[i] != 0)
    {
      for(int j = count_array[i]; j != 0; --j)
        datas[cur_pos++] = i+1;
    }
  }
}

//为了方便后续测试,将1000000 作为最大数
void CountSort(std::vector<int>& datas)
{
  static std::vector<int> count_array(1000000, 0);
  count_array.assign(1000000, 0);

  for(size_t i = 0; i < datas.size(); ++i)
    count_array[datas[i]-1]++;

  int cur_pos = 0;
  for(int i = 0; i < 1000000; ++i)
  {
    if(count_array[i] != 0)
    {
      for(int j = count_array[i]; j != 0; --j)
        datas[cur_pos++] = i+1;
    }
  }
}

基数排序

开始分配基数个空间,当前位在哪个基数上,就添加给那个基数空间;从左到右,每次排序后将当前排序值赋值给datas;从低位到高位依次排序

void RadixSort(std::vector<int>& datas, const int radix)
{
  bool is_run = true;
  int radix_pos = 0;
  static vector<vector<int> > radix_vecs(radix);
  radix_vecs.assign(radix, vector<int>());
  while(is_run)
  {
    is_run = false;
    for(size_t i = 0; i < datas.size(); ++i)
    {
      int pow_value = std::pow(radix, radix_pos);
      int div_value = datas[i]/pow_value;
      //当前选择哪个位置
      int mod_value = div_value%radix;
      //当高位还有值时继续遍历,所有高位都为0时,停止遍历
      if(div_value/radix != 0)
        is_run = true;

      //按位置存储
      radix_vecs[mod_value].push_back(datas[i]);
    }
    ++radix_pos;

    //将当前位排序好后赋值给datas
    for(int i = 0, idx = 0; i < radix; ++i)
    {
      for(size_t j = 0; j < radix_vecs[i].size(); ++j)
        datas[idx++] = radix_vecs[i][j];
      radix_vecs[i].clear();
    }
  }
}

//为了方便后续测试,将10 作为基数
void RadixSort(std::vector<int>& datas)
{
  bool is_run = true;
  int radix_pos = 0;
  static vector<vector<int> > radix_vecs(10);
  radix_vecs.assign(10, vector<int>());
  while(is_run)
  {
    is_run = false;
    for(size_t i = 0; i < datas.size(); ++i)
    {
      int pow_value = std::pow(10, radix_pos);
      int div_value = datas[i]/pow_value;
      //当前选择哪个位置
      int mod_value = div_value%10;
      if(div_value/10 != 0)
        is_run = true;

      //按位置存储
      radix_vecs[mod_value].push_back(datas[i]);
    }
    ++radix_pos;

    //将当前位排序好后赋值给datas
    for(int i = 0, idx = 0; i < 10; ++i)
    {
      for(size_t j = 0; j < radix_vecs[i].size(); ++j)
        datas[idx++] = radix_vecs[i][j];
      radix_vecs[i].clear();
    }
  }
}

测试代码

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
using namespace std;

typedef void (*SortFun)(std::vector<int>&);

//输出数据
template<class SStream>
SStream& operator << (SStream& os, const vector<int>& vec)
{
  for(size_t idx = 0; idx < vec.size(); ++idx)
    os << vec[idx] << ",";
  os << endl;
  return os;
}

//随机分配数据,count_max:分配的最大个数 value_max:分配的最大值
void GetData(std::vector<int>& datas, int count_max = 10000, int value_max = 1000000)
{
  int rand_count = rand()%count_max+1;
  int rand_num;
  for(int i = 0; i < rand_count; ++i)
  {
    rand_num = rand()%value_max;
    datas.push_back(rand_num);
  }
}

//检验是否是有序
bool CheckSort(const std::vector<int>& datas)
{
  for(size_t idx = 0; idx+1 < datas.size(); ++idx)
  {
    if(datas[idx+1] < datas[idx])
    {
      cout << datas << endl;
      cout << "idx:" << idx << ", " << datas[idx] << ", " << datas[idx+1] << endl;
      return false;
    }
  }
  return true;
}

//检验两结果是否一致
bool CheckResult(const std::vector<int>& datas1, const std::vector<int>& datas2)
{
  if(datas1.size() != datas2.size())
    return false;
  for(size_t i = 0; i < datas1.size(); ++i)
    if(datas1[i] != datas2[i])
      return false;
  return true;
}
int main()
{
  srand(time(NULL));
  //测试多少次
  int test_times = 1000;
  //获取数据
  vector<vector<int> *>* total_datas = new vector<vector<int> *>();
  //产生1000个这样的测试数据
  for(int i = 0; i < test_times; ++i)
  {
    vector<int> *tmp = new vector<int>();
    GetData(*tmp);
    total_datas->push_back(tmp);
  }
  cout << "Create Data success!" << endl;

  //检验正确性
  for(size_t i = 0; i < total_datas->size(); ++i)
  {
    vector<int> *tmp = new vector<int>();
    vector<int> *tmp1 = new vector<int>();

    //前三种排序速度太慢,可先将test_times设成较小值测试正确性
    //*tmp = *((*total_datas)[i]);
    //InsertSort(*tmp);
    //CheckSort(*tmp);

    //*tmp = *((*total_datas)[i]);
    //BubbleSort(*tmp);
    //CheckSort(*tmp);

    //*tmp = *((*total_datas)[i]);
    //SelectSort(*tmp);
    //CheckSort(*tmp);

    *tmp = *((*total_datas)[i]);
    QuickSort(*tmp);
    CheckSort(*tmp);

    *tmp = *((*total_datas)[i]);
    HeapSort(*tmp);
    //如果数据不正确,可将数据输出后进行断点调试
    if(!CheckSort(*tmp))
    {
      cout << *((*total_datas)[i]) << endl;
    }

    *tmp = *((*total_datas)[i]);
    MergeSort(*tmp);
    CheckSort(*tmp);


    *tmp = *((*total_datas)[i]);
    CountSort(*tmp, 1000000);
    CheckSort(*tmp);


    *tmp = *((*total_datas)[i]);
    RadixSort(*tmp);
    CheckSort(*tmp);

    *tmp = *((*total_datas)[i]);
    QuickSort(*tmp);
    *tmp1 = *((*total_datas)[i]);
    MergeSort(*tmp1);
    //比较两种排序的结果是否一致
    if(!CheckResult(*tmp, *tmp1))
    {
      cout << *((*total_datas)[i]) << endl;
      break;
    }
    delete tmp;
    delete tmp1;

    cout << i << endl;
  }

  //检验时间
  vector funs(5);
  funs[0] = MergeSort;
  funs[1] = RadixSort;
  funs[2] = QuickSort;
  funs[3] = CountSort;
  funs[4] = HeapSort;

  for(size_t idx = 0; idx < funs.size(); ++idx)
  {
    struct timeval start_time, end_time;
    gettimeofday(&start_time, NULL);

    for(size_t i = 0; i < total_datas->size(); ++i)
    {
      vector<int> *tmp = new vector<int>();
      *tmp = *((*total_datas)[i]);
      funs[idx](*tmp);
    }

    gettimeofday(&end_time, NULL);
    cout << "runtime:" << (end_time.tv_sec-start_time.tv_sec)*1000 + (end_time.tv_usec-start_time.tv_usec)/1000 << endl;
  }

  return 0;
}

结果如下:这里写图片描述
速度依次是:快速排序>归并排序>堆排序>基数排序>计数排序
特殊情况下基数排序和计数排序可能更快,归并排序和堆排序效率接近相等但都小于快排,其它三种蜗牛排序忽略

你可能感兴趣的:(几大常用排序算法编写及正确性、效率测试)