11.4 双重散列法 : http://blog.csdn.net/zixiawzm/article/details/6746946
先生产n个随机数,然后完成下面操作:
a)顺序统计:分别使用随机选择(rand-select)和最坏情况下为线性时间的选择算法,分析两种选择算法的运行时间随个数n的变化情况。
图表对比:
备注:测试环境说明: OS:Windows 8.1 IDE: Code::Blocks 13.12 Compiler: G++ 在 Ubuntu 14.04 LTS 环境下测试发现优化后算法在数据规模比较大时(大于100000) 运行所耗时间反而更大,表示疑惑。 不过在Windows 环境下,其他条件都保持相同 (IDE 版本、 编译器版本)的情况下,两种算法的效率在同一数量级 原因: 理论上来说,够随机的序列可以导致两种算法在数据规模达到一定的时候发现明显差异,优化前的算法应该是明显慢于优化后。 不过可能存在 : 一: 优化后算法在函数之间相互调用过为频繁,产生额外的开销。 二: 随机数生成不够随机 (可能是我代码写的不好)
PROB1随机选择
1 //#pragma comment(linker, "/STACK:16777216") //for c++ Compiler 2 #include <stdio.h> 3 #include <iostream> 4 #include <fstream> 5 #include <cstring> 6 #include <cmath> 7 #include <stack> 8 #include <string> 9 #include <map> 10 #include <set> 11 #include <list> 12 #include <queue> 13 #include <vector> 14 #include <ctime> 15 #include <algorithm> 16 #define Max(a,b) (((a) > (b)) ? (a) : (b)) 17 #define Min(a,b) (((a) < (b)) ? (a) : (b)) 18 #define Abs(x) (((x) > 0) ? (x) : (-(x))) 19 #define MOD 1000000007 20 #define pi acos(-1.0) 21 22 using namespace std; 23 24 typedef long long ll ; 25 typedef unsigned long long ull ; 26 typedef unsigned int uint ; 27 typedef unsigned char uchar ; 28 29 template<class T> inline void checkmin(T &a,T b){if(a>b) a=b;} 30 template<class T> inline void checkmax(T &a,T b){if(a<b) a=b;} 31 32 const double eps = 1e-7 ; 33 const int N = 210 ; 34 const int M = 1100011*2 ; 35 const ll P = 10000000097ll ; 36 const int MAXN = 10900000 ; 37 38 vector <int> a; 39 40 int partition (int p, int r) { 41 int x = a[r]; 42 int i = p - 1; 43 for (int j = p; j < r; ++j) { 44 if (a[j] <= x) { 45 ++i; 46 swap (a[i], a[j]); 47 } 48 } 49 swap (a[i + 1], a[r]); 50 return i + 1; 51 } 52 53 int randomizedPartition (int p, int r) { 54 int i = rand () % (r - p) + p; 55 swap (a[r], a[i]); 56 return partition (p, r); 57 } 58 59 int randomizedSelect (int p, int r, int i) { 60 if (p == r) { 61 return a[p]; 62 } 63 64 int q = randomizedPartition (p, r); 65 int k = q - p + 1; 66 67 if (i == k) { 68 return a[q]; 69 } else if (i < k) { 70 return randomizedSelect (p, q - 1, i); 71 } else { 72 return randomizedSelect (q + 1, r, i - k); 73 } 74 75 } 76 77 void print (){ 78 for (int i = 1; i < a.size (); ++i) { 79 cout << a[i] << endl; 80 } 81 } 82 83 int main () { 84 int i, j, t, n, m; 85 int countTime = 10; 86 87 clock_t start, finish; 88 double totalTime, ansTime = 0; 89 90 cout << "plz input the number" << endl; 91 n = 10000000; 92 cout << "n = " << n << endl; 93 94 cout << "plz input the ith smallest number u want to search" << endl; 95 m = 5000000; 96 cout << "m = " << m << endl << endl; 97 98 while (countTime--) { 99 a.clear (); //IMPORT 100 start = clock (); 101 102 srand((int)time(NULL)); 103 104 a.push_back (0); 105 for (i = 0; i < n; ++i) { 106 a.push_back (rand() % 65536); 107 } 108 109 cout << randomizedSelect(1, n, m); 110 111 finish = clock (); 112 totalTime = (double) (finish - start) / CLOCKS_PER_SEC; 113 cout << '\t' << totalTime << "s" << endl; 114 115 ansTime += totalTime; 116 } 117 118 cout << endl; 119 cout << "10 times test total time is : " << ansTime << "s" << endl; 120 cout << "Average time is : " << ansTime / 10.0 << "s" << endl; 121 122 return 0; 123 }
PROB2最坏情况下为线性时间的选择算法
1 //#pragma comment(linker, "/STACK:16777216") //for c++ Compiler 2 #include <stdio.h> 3 #include <iostream> 4 #include <fstream> 5 #include <cstring> 6 #include <cmath> 7 #include <stack> 8 #include <string> 9 #include <map> 10 #include <set> 11 #include <list> 12 #include <queue> 13 #include <vector> 14 #include <ctime> 15 #include <algorithm> 16 #define Max(a,b) (((a) > (b)) ? (a) : (b)) 17 #define Min(a,b) (((a) < (b)) ? (a) : (b)) 18 #define Abs(x) (((x) > 0) ? (x) : (-(x))) 19 #define MOD 1000000007 20 #define pi acos(-1.0) 21 22 using namespace std; 23 24 typedef long long ll ; 25 typedef unsigned long long ull ; 26 typedef unsigned int uint ; 27 typedef unsigned char uchar ; 28 29 template<class T> inline void checkmin(T &a,T b){if(a>b) a=b;} 30 template<class T> inline void checkmax(T &a,T b){if(a<b) a=b;} 31 32 const double eps = 1e-7 ; 33 const int N = 210 ; 34 const int M = 1100011*2 ; 35 const ll P = 10000000097ll ; 36 const int MAXN = 10900000 ; 37 38 const int maxn = 10000000; //kp -> size 39 const int mm = 5000000; 40 const int maxm = maxn / 5 + 1; //mid -> size 41 42 int kp[maxn]; 43 int mid[maxm]; //插入排序 44 45 void InsertionSort (int kp[], int n) { 46 for (int j, i = 1; i < n; ++i) { 47 int tmp = kp[i]; 48 for (j = i; j > 0 && kp[j - 1] > tmp; --j) { 49 kp[j] = kp[j - 1]; 50 } 51 kp[j] = tmp; 52 } 53 } 54 55 //查找中位数, 保证每一个划分都是好的划分 56 int FindMedian (int kp[], int low, int high) { 57 if (low == high) { 58 return kp[low]; 59 } 60 int index = low; //index初始化为low 61 //如果本身小于5个元素,这一步就跳过 62 if (high - low + 1 >= 5) { //储存中位数到mid[] 63 for (index = low; index <= high - 4; index += 5) { 64 InsertionSort (kp + index, 5); 65 int num = index - low; 66 mid[num / 5] = kp[index + 2]; 67 } 68 } //处理剩下不足5个的元素 69 int remain = high - index + 1; 70 if (remain > 0) { 71 InsertionSort (kp + index, remain); 72 int num = index - low; 73 mid[num / 5] = kp[index + (remain >> 1)]; //下中位数 74 } 75 int cnt = (high - low + 1) / 5; 76 if ((high - low + 1) % 5 == 0) { 77 --cnt; //下标是从0开始,所以需要-1 78 } //存放在[0…tmp] 79 if (cnt == 0) { 80 return mid[0]; 81 } else { 82 return FindMedian(mid, 0, cnt); 83 } 84 } 85 86 int Qselect (int kp[], int low, int high, int k) { 87 int pivotloc = FindMedian(kp, low, high); //这里有点不一样,因为不知道pivotloc下标,所以全部都要比较 88 int i = low - 1, j = high + 1; 89 for (;;) { 90 while (kp[++i] < pivotloc) {} 91 while (kp[--j] > pivotloc) {} 92 if (i < j) swap(kp[i], kp[j]); 93 else break; 94 } 95 int num = i - low + 1; 96 if (k == num) return kp[i]; 97 if (k < num) { 98 return Qselect(kp, low, i - 1, k); 99 } else { 100 return Qselect(kp, i + 1, high, k - num); 101 } 102 } 103 104 int main () { 105 int i, j, t, n, m; 106 int countTime = 10; 107 108 clock_t start, finish; 109 double totalTime, ansTime = 0; 110 111 cout << "plz input the number" << endl; 112 n = maxn; 113 cout << "n = " << n << endl; 114 115 cout << "plz input the ith smallest number u want to search" << endl; 116 m = mm; 117 cout << "m = " << m << endl << endl; 118 119 while (countTime--) { 120 start = clock (); 121 122 srand((int)time(NULL)); 123 124 for (i = 0; i < n; ++i) { 125 kp[i] = rand () % 65536; 126 } 127 128 cout << Qselect(kp, 0, maxn - 1, m); 129 130 finish = clock (); 131 totalTime = (double) (finish - start) / CLOCKS_PER_SEC; 132 cout << '\t' << totalTime << "s" << endl; 133 134 ansTime += totalTime; 135 } 136 137 cout << endl; 138 cout << "10 times test total time is : " << ansTime << "s" << endl; 139 cout << "Average time is : " << ansTime / 10.0 << "s" << endl; 140 141 return 0; 142 }
b)散列存储:分别采用链表法和开放地址法(双重散列)方法存储上述随机数,并实现相关的查询、插入和删除等基本操作。
PROB3链表法
1 //#pragma comment(linker, "/STACK:16777216") //for c++ Compiler 2 #include <stdio.h> 3 #include <iostream> 4 #include <fstream> 5 #include <cstring> 6 #include <ctime> 7 #include <cmath> 8 #include <stack> 9 #include <string> 10 #include <map> 11 #include <set> 12 #include <list> 13 #include <queue> 14 #include <vector> 15 #include <algorithm> 16 #define Max(a,b) (((a) > (b)) ? (a) : (b)) 17 #define Min(a,b) (((a) < (b)) ? (a) : (b)) 18 #define Abs(x) (((x) > 0) ? (x) : (-(x))) 19 #define MOD 1000000007 20 #define pi acos(-1.0) 21 22 using namespace std; 23 24 typedef long long ll ; 25 typedef unsigned long long ull ; 26 typedef unsigned int uint ; 27 typedef unsigned char uchar ; 28 29 template<class T> inline void checkmin(T &a,T b){if(a>b) a=b;} 30 template<class T> inline void checkmax(T &a,T b){if(a<b) a=b;} 31 32 const double eps = 1e-7 ; 33 const int M = 1100011*2 ; 34 const ll P = 10000000097ll ; 35 const int MAXN = 10900000 ; 36 const int INF = 0x3f3f3f3f ; 37 38 const int MAXSIZE = 31 ; 39 const int NULLKEY = -32768 ; 40 41 typedef struct node { 42 int data; 43 struct node *next; 44 } Node; 45 46 typedef struct Table { 47 Node *table; 48 int count; 49 } HashTable; 50 51 void InitHashTable (HashTable *H) { 52 H->count = MAXSIZE; 53 H->table = (Node *) malloc (sizeof(Node) * H->count); 54 for(int i = 0;i < H->count; ++i) { 55 H->table[i].data = NULLKEY; 56 H->table[i].next = NULL; 57 } 58 } 59 60 int Hash (int key) { 61 return key % MAXSIZE; 62 } 63 64 void InsertHashTable (HashTable *H,int key) { 65 int addr = Hash(key); 66 if(H->table[addr].data != key && H->table[addr].data != NULLKEY) { 67 Node *temp = (Node *)malloc(sizeof(Node)); 68 temp->next = H->table[addr].next; 69 temp->data = key; 70 H->table[addr].next = temp; 71 } 72 else if(H->table[addr].data == NULLKEY) 73 H->table[addr].data = key; 74 } 75 76 bool SearchHashTable (HashTable *H,int key) { 77 int addr = Hash(key); 78 if(H->table[addr].data == key) 79 return true; 80 Node *p = H->table[addr].next; 81 while(p != NULL) { 82 if(p->data == key) 83 return true; 84 else 85 p = p->next; 86 } 87 return false; 88 } 89 90 void print (HashTable *H) { 91 for (int i = 0; i < MAXSIZE; ++i) { 92 int addr = i; 93 cout << addr + 1 << ": " << endl; 94 cout << "\t"; 95 if(H->table[addr].data != NULLKEY) { 96 cout << H->table[i].data; 97 } else { 98 cout << "NULL" << endl; 99 } 100 Node *p = H->table[addr].next; 101 while(p != NULL) { 102 cout << " -> " << p->data; 103 p = p->next; 104 } 105 cout << endl; 106 } 107 } 108 109 int main() { 110 srand((int)time(NULL)); 111 HashTable H; 112 InitHashTable(&H); 113 114 for(int i = 0; i < MAXSIZE; ++i) { 115 InsertHashTable (&H,rand () % 65536); 116 } 117 print (&H); 118 printf("Input the number to search:\n"); 119 int key; 120 cin >> key; 121 122 if (SearchHashTable (&H,key)) { 123 printf("Found :%d\n",key); 124 } else { 125 printf("Not Found:%d\n",key); 126 } 127 128 return 0; 129 }
PROB4开放地址法 (这份代码几乎 Copy from referecne, 有空会改进)
//#pragma comment(linker, "/STACK:16777216") //for c++ Compiler #include <stdio.h> #include <iostream> #include <fstream> #include <cstring> #include <cmath> #include <stack> #include <string> #include <map> #include <set> #include <list> #include <queue> #include <vector> #include <ctime> #include <algorithm> #define Max(a,b) (((a) > (b)) ? (a) : (b)) #define Min(a,b) (((a) < (b)) ? (a) : (b)) #define Abs(x) (((x) > 0) ? (x) : (-(x))) #define MOD 1000000007 #define pi acos(-1.0) #define slot_size 100000 //散列槽的大小 #define arr_size 80000 //动态关键字集合 #define min_size 0 //动态关键字集合的最小值 #define max_size 999 #define total_size 999999 //动态关键字集合的最大值 #define NIL -1 #define DELE -2 using namespace std; typedef long long ll ; typedef unsigned long long ull ; typedef unsigned int uint ; typedef unsigned char uchar ; template<class T> inline void checkmin(T &a,T b){if(a>b) a=b;} template<class T> inline void checkmax(T &a,T b){if(a<b) a=b;} const double eps = 1e-7 ; const int N = 210 ; const int M = 1100011*2 ; const ll P = 10000000097ll ; const int MAXN = 10900000 ; long* arr_set; long link_hash[slot_size]; long suc_count=0; long unsuc_count=0; long hash_function (long key,long i) { //第i次探查的序列散列函数 return (key % 700 + i * (key % (701 - 1))) % slot_size; } long* ran_arr (long size, long min = 0, long max = 999) { //产生不重复的自定义范围的随机数 if(max <= min) { return NULL; } long* arr; long up_th = 0; long down_th = 0; arr = new long[size]; srand((unsigned)time(NULL)); for(long i = 0; i < size; i++) { long check = 1; while (check) { up_th = rand() * (max - min) / 32767 + min; down_th = rand() * (max - min) / 32767 + min; arr[i] = up_th * (max + 1) + down_th; long j = 0; while(j < i) { if(arr[i] == arr[j]) { j = 0; break; } j++; } if(j==i) check=0; } } return arr; } void print_arr(long* set,long size) { //打印数组函数 for (long i = 0; i < size; i++) { cout << set[i] << endl; } } bool hash_insert(long k) { //插入函数 long j = 0; for(long i = 0; i < slot_size; i++) { j = hash_function(k,i); if (link_hash[j] == NIL) { link_hash[j] = k; return true; } } return false; } bool hash_find (long k) { //查找函数 long j = 0; for(int i = 0; i < slot_size; i++) { j = hash_function(k,i); if(link_hash[j] == k) return true; else { if(link_hash[j]==NIL) { return false; } } } return false; } bool hash_delete (long k) { //删除函数 long j = 0; for(int i = 0; i < slot_size; i++) { j = hash_function(k,i); if(link_hash[j] == k) { link_hash[j] = DELE; return true; } else { if (link_hash[j] == NIL) { return false; } } } return false; } void print_hash (long start, long end) { //打印散列表的函数 long count = 0; for (long j = start; j < end; j++) { if(link_hash[j] == NIL) { cout<<j<<"[NIL]"<<" "; } else if (link_hash[j] == DELE) { cout<<j<<"[DEL]"<<" "; } else { cout<<j<<"["<<link_hash[j]<<"] "; } count++; if(count == 4) { count = 0; cout<<endl; } } cout<<endl; return; } int main() { //初始化散列表的槽 for(int d=0;d<1;d++) { //For times arr_set = ran_arr(arr_size - d * 10000,min_size,max_size);//to generate arr_size from 1 to 1000 random number for(long n=0;n<slot_size;n++) { link_hash[n]=NIL; } cout<<"befor the insertion:"<<endl<<endl; print_hash(200,232); //插入操作 for(long m=0; m<arr_size-d*10000; m++) { hash_insert(arr_set[m]); } cout<<"the size of NUMBER is: "<<arr_size-d*10000<<endl; cout<<"the size of SLOT is: "<<slot_size<<endl; cout<<"the value of a=n/m is: "<<float(arr_size-d*10000)/float(slot_size)<<endl; cout<<"***********************************************************"<<endl<<endl; cout<<"after the insertion:"<<endl<<endl; print_hash(200,232); //查找操作 for(long n=0; n<arr_size-d*10000; n++) { if(hash_find(arr_set[n])) { suc_count++; } else { unsuc_count++; } } cout<<"the success finding count is :"<<suc_count<<endl; cout<<"the unsuccess finding count is :"<<unsuc_count<<endl<<endl; cout<<"***********************************************************"<<endl<<endl; suc_count=unsuc_count=0;//计数清零; //删除操作 for(long j=0; j<arr_size-d*10000; j++) { if(hash_delete(arr_set[j])) { suc_count++; } else { unsuc_count++; } } suc_count=unsuc_count=0;//计数清零; print_hash(200,232); } return 0; }
【未完待续】