分析解决:如果用一个int保存一个正整数,一个int为4 Byte,100万个数要用400万 Byte,约为4M。如果用快排,时间复杂度为O(nlogn)。
考虑到问题的特殊性,所有数字均为正整数,且都不重复,这样的问题可以用位图解决。每个数字对应位图中的一位,如果数字出现则置1,否则置0。一个int 4 Byte可以保存32个数,因为所有的数都小于1000万,所以可以先用大小为1000万的位图来记录这100万个数,最后从头扫描这个位图,把置1的数字输出就是按序的结果。用位图排序需要的空间约为1.25M,时间复杂度为O(N),无论空间还是时间都比快排好。
/* phase 1: initialize set to empty */ for i = [0, N) bit[i] = 0 /* phase 2: insert present elements into the set */ for each i in the input file bit[i] = 1 /* phase 3: write the sorted output */ for i = [0, N) if bit[i] = 1 write i on the output file
#include <iostream> #include <cstdlib> #include <ctime> #include <set> #include <vector> #include <fstream> using namespace std; // generate random number between i and j, // both i and j are inclusive int randint(int i, int j) { if (j < i) { int t = i; i = j; j = t; } int ret = i + rand() % (j - i + 1); return ret; } // floyd sample, take m random number without // duplicate from n void floyd_f2(int n, int m, set<int> &S) { for (int i = n - m; i < n; ++i) { int j = randint(0, i); if (S.insert(j).second) continue; else S.insert(i); } } // shuffle the data set V void knuth_shuffle(vector<int> &V) { int n = V.size(); for (int i = n - 1; i != 0; --i) { int j = randint(0, i); int t = V[i]; V[i] = V[j]; V[j] = t; } } template<typename T> void output_file(T beg, T end, char *file) { ofstream outfile(file); if (!outfile) { cout << "file \"" << file << "\" not exists" << endl; return; } while (beg != end) { outfile << *beg << endl; ++beg; } outfile.close(); } void help() { cout << "usage:" << endl; cout << "./Floyd_F2 n m output_file_name" << endl; } int main(int argc, char* argv[]) { if (argc != 4) { help(); return 1; } srand(time(NULL)); int n = atoi(argv[1]); int m = atoi(argv[2]); set<int> S; // sample floyd_f2(n, m, S); // shuffle vector<int> V(S.begin(), S.end()); knuth_shuffle(V); // output vector<int>::iterator VBeg = V.begin(); vector<int>::iterator VEnd = V.end(); //output(VBeg, VEnd); output_file(VBeg, VEnd, argv[3]); return 0; }
#define BITWORD 32 #define SHIFT 5 #define MARK 0x1F #define N 10000000 #define COUNT ((N) / (BITWORD)) int ary[COUNT + 1]; void set(int i) { ary[i >> SHIFT] |= (1 << (i & MARK)); } bool test(int i) { return (ary[i >> SHIFT] & (1 << (i & MARK))); } void clr(int i) { ary[i >> SHIFT] &= ~(1 << (i & MARK)); }
#include <iostream> #include <fstream> #include <vector> #include <string> #include <sstream> using namespace std; #define BITWORD 32 #define SHIFT 5 #define MARK 0x1F #define N 10000000 #define COUNT ((N) / (BITWORD)) int ary[COUNT + 1]; void set(int i) { ary[i >> SHIFT] |= (1 << (i & MARK)); } bool test(int i) { return (ary[i >> SHIFT] & (1 << (i & MARK))); } void clr(int i) { ary[i >> SHIFT] &= ~(1 << (i & MARK)); } void help() { cout << "usage:" << endl; cout << "./BitSort inputfile outputfile" << endl; } int main(int argc, char *argv[]) { if (argc != 3) { help(); return 1; } ifstream infile(argv[1]); if (!infile) { cout << "file \"" << argv[1] << "\" not exists" << endl; return 1; } time_t t_start, t_end; t_start = time(NULL); // read the data and set the data in the bit map string line; istringstream istream; int num = 0; while (getline(infile, line)) { istream.str(line); istream >> num; // read the number set(num); // set the number istream.clear(); } infile.close(); ofstream outfile(argv[2]); if (!outfile) { cout << "create output file \"" << argv[2] << "\" failed" << endl; return 1; } // read the bit map and write to the file for (int i = 0; i <= N; ++i) { if (test(i)) outfile << i << endl; } outfile.close(); t_end = time(NULL); cout << "time collapse: " << difftime(t_end, t_start) << " s" << endl; cout << "need " << ((double)N / (8 * 1000000)) << " M memory" << endl; return 0; }
#define BITWORD 8 #define SHIFT 3 #define MARK 0x07 #define TEST 0x0F #define POS ((i & MARK) << 2) #define N 10000000 #define COUNT ((N) / (BITWORD)) int ary[COUNT + 1]; void set(int i) { ary[i >> SHIFT] += 1 << POS; } // return the presence count of number i, used for output int test(int i) { return (ary[i >> SHIFT] & (TEST << POS)) >> POS; } void clr(int i) { ary[i >> SHIFT] &= ~(TEST << ((i & MARK) << 2)); }
// read the bit map and write to the file for (int i = 0; i <= N; ++i) { int count = test(i); // get the count of number i's presence for (int j = 0; j != count; ++j) outfile << i << endl; }