自己给自己放了七天假,回家休息一下。在家懒,七天就写了300多行代码,还质量不高...
在《计算机编程艺术——vlo3》中(以下简称Art3),在排序内容前言的习题中谈到关于带主键和次键的排序问题,其实就类似与字符串的排序方式,即比完主键看次键1,再次键2...。三种方法:
A:先按主键排序,再在排完序的序列里将主键相同的划分为一个个区域,再比较次键1,递归的进行下去,直到次键全部比较完或区域中只剩下一个元素。
B:先以最小的次键n排序,再以次键n-1排序,...一直到主键。
C:排序时比较是以字典排序法比较,一次搞定。
以下是我的实现和测试代码:
1 /* 对于具有主键(唯一)和次键(>=0)的排序,有三种方式:
2 1.先对主键进行排序进行分组,在每个分组内再依次对各个次键进行排序分组
3 2.先对优先级最低的键进行排序,然后再逐步上升
4 3.对于排序的元素比较方式是字典排序法
5
6 注意:要使三种排序的结果一致,前提条件是排序是稳定的。
7
8 实验表明三种方案中,C(字典排序法)是最稳定的,效率最高的。
9 */
10 // for timeval and gettimeofday
11 #include < sys / time.h >
12
13 #include < stdio.h >
14 #include < stdlib.h >
15
16 #define A 1
17 #define B 2
18 #define C 3
19
20 typedef
21 struct
22 {
23 int keys[ 100 ];
24 }elem;
25
26 // 记录三种策略的总时间(单位usec)
27 unsigned int total_A_u = 0 ;
28 unsigned int total_B_u = 0 ;
29 unsigned int total_C_u = 0 ;
30
31
32
33
34 typedef int ( * msort_func)( void * a, void * b, int at);
35
36 // merge sort 保证排序的稳定性
37 // 参数:
38 // array为代排序的序列
39 // n序列元素的个数
40 // size序列元素的大小
41 // f为比较函数
42 //
43 void msort(elem ** array,size_t n, int key_num,msort_func f);
44
45 // 模拟情形1:已经排好序的序列,但序列中的元素数值是随机的
46 void sorted_random(elem ** pe,unsigned int n, int key_num);
47 // 模拟情景2:完全反序
48 void res_sorted_random(elem ** pe,unsigned int n, int key_num);
49 // 模拟情景3:完全随机
50 void unsorted(elem ** pe,unsigned int n, int key_num);
51
52 // 分析函数:统计分析每一种情形下的三种排序方法的复杂度:
53 // 1.比较次数
54 // 2.总时间
55 void analysis(elem ** pe,unsigned int n, int key_num, int type);
56
57
58 void print_keys(elem ** pe, int n, int key_num)
59 {
60 int i,j;
61 for (i = 0 ;i < n;i ++ )
62 {
63 printf( " The %dth element is: " ,i + 1 );
64 for (j = 0 ;j < key_num;j ++ )
65 {
66 printf( " %d " ,pe[i] -> keys[j]);
67 }
68 printf( " \n " );
69 }
70 }
71
72 int sort_func( void * a , void * b, int at)
73 {
74 elem * x = (elem * )a;
75 elem * y = (elem * )b;
76 return (x -> keys[at] - y -> keys[at]);
77 }
78
79 int main()
80 {
81 unsigned int elem_num;
82 scanf( " %u " , & elem_num);
83 elem ** pelems = (elem ** )malloc( sizeof (elem * ) * elem_num);
84 int i;
85 for (i = 0 ;i < elem_num;i ++ )
86 pelems[i] = (elem * )malloc( sizeof (elem));
87 int key_num;
88 for (key_num = 10 ;key_num <= 30 ;key_num ++ )
89 {
90 sorted_random(pelems,elem_num,key_num);
91 printf( " SORTED:\n " );
92 print_keys(pelems,elem_num,key_num);
93 analysis(pelems,elem_num,key_num,A);
94 analysis(pelems,elem_num,key_num,B);
95 analysis(pelems,elem_num,key_num,C);
96
97 res_sorted_random(pelems,elem_num,key_num);
98 printf( " RES SORTED\n " );
99 print_keys(pelems,elem_num,key_num);
100 analysis(pelems,elem_num,key_num,A);
101 analysis(pelems,elem_num,key_num,B);
102 analysis(pelems,elem_num,key_num,C);
103
104 unsorted(pelems,elem_num,key_num);
105 printf( " RANDOM\n " );
106 print_keys(pelems,elem_num,key_num);
107 analysis(pelems,elem_num,key_num,A);
108 analysis(pelems,elem_num,key_num,B);
109 analysis(pelems,elem_num,key_num,C);
110
111 printf( " Total time of A:%ld B:%ld C:%ld\n " ,total_A_u,total_B_u,total_C_u);
112 }
113 for (i = 0 ;i < elem_num;i ++ )
114 free(pelems[i]);
115 free(pelems);
116 return 0 ;
117 }
118
119 void msort(elem ** array,size_t n, int key_num,msort_func f)
120 {
121 if (n <= 1 )
122 return ;
123 int mid = n / 2 ;
124 msort(array,mid,key_num,f);
125 msort(array + mid,n - mid,key_num,f);
126
127 elem ** tmp = (elem ** )malloc(n * sizeof (elem * ));
128 int i,j,k;
129 k = i = 0 ;j = mid;
130 while (i < mid && j < n)
131 {
132 // [i] > [j]
133 if (f(array[i],array[j],key_num) > 0 )
134 {
135 tmp[k] = array[j];
136 j ++ ;
137 }
138 // [i] <= [j]
139 else
140 {
141 tmp[k] = array[i];
142 i ++ ;
143 }
144 k ++ ;
145 }
146 if (k != n)
147 {
148 if (i < mid)
149 {
150 while (i < mid)
151 tmp[k ++ ] = array[i ++ ];
152 }
153 else
154 {
155 while (j < n)
156 tmp[k ++ ] = array[j ++ ];
157 }
158 }
159 for (i = 0 ;i < n;i ++ )
160 array[i] = tmp[i];
161 }
162
163 inline unsigned int gen_rand(unsigned int last)
164 {
165 static unsigned int add = 0 ;
166 add += last;
167 srand(time(NULL) + (add));
168 return rand();
169 }
170
171 void sorted_random(elem ** pe,unsigned int n, int key_num)
172 {
173 int at = 0 ;
174 for (;at < key_num;at ++ )
175 {
176 int highest = 10000 ;
177 unsigned int remain = n;
178 int now = 0 ;
179 unsigned int last = 0 ;
180 while (remain)
181 {
182 last = gen_rand(last);
183 if ((last % highest) < remain)
184 {
185 pe[n - remain] -> keys[at] = now;
186 remain -- ;
187 }
188 else
189 {
190 now ++ ;
191 highest -- ;
192 }
193 }
194 }
195 }
196
197
198
199
200 void res_sorted_random(elem ** pe,unsigned int n, int key_num)
201 {
202 int at = 0 ;
203 for (;at < key_num;at ++ )
204 {
205 int highest = 10000 ;
206 unsigned int remain = n;
207 int now = 0 ;
208 unsigned int last = 0 ;
209 while (remain)
210 {
211 last = gen_rand(last);
212 if ((last % highest) < remain)
213 {
214 pe[remain - 1 ] -> keys[at] = now;
215 remain -- ;
216 }
217 else
218 {
219 now ++ ;
220 highest -- ;
221 }
222 }
223 }
224 }
225 void unsorted(elem ** pe,unsigned int n, int key_num)
226 {
227 int at = 0 ;
228 for (;at < key_num;at ++ )
229 {
230 int highest = 10000 ;
231 int i;
232 unsigned int last = 0 ;
233 for (i = 0 ;i < n;i ++ )
234 {
235 last = gen_rand(last);
236 pe[i] -> keys[at] = (last % highest);
237 }
238 }
239 }
240
241 void plan_A(elem ** pelems,unsigned int n, int key_num, int now)
242 {
243 if (now == key_num || n == 1 )
244 return ;
245 msort(pelems,n,now,sort_func);
246 int group_val = ( * pelems) -> keys[now];
247 int i = 1 ;
248 elem ** group = pelems;
249 elem ** end = pelems + n;
250 while (group + i != end)
251 {
252 if (pelems[i] -> keys[now] == group_val)
253 {
254 i ++ ;
255 }
256 else
257 {
258 plan_A(group,i,key_num,now + 1 );
259 group += i;
260 i = 1 ;
261 if (group != end)
262 group_val = ( * group) -> keys[now];
263 }
264 }
265 }
266 void plan_B(elem ** pelems,unsigned int n, int key_num)
267 {
268 elem ** tpelems = (elem ** )malloc( sizeof (elem * ) * n);
269 int i;
270 for (i = 0 ;i < n;i ++ )
271 tpelems[i] = pelems[i];
272 int now = key_num - 1 ;
273 while (now >= 0 )
274 {
275 msort(tpelems,n,now,sort_func);
276 now -- ;
277 }
278
279 print_keys(tpelems,n,key_num);
280 free(tpelems);
281 }
282 int sort_func_C( void * a, void * b, int key_num)
283 {
284 elem * x = (elem * )a;
285 elem * y = (elem * )b;
286 int i;
287 for (i = 0 ;i < key_num;i ++ )
288 {
289 if (x -> keys[i] != y -> keys[i])
290 return (x -> keys[i] - y -> keys[i]);
291 }
292 return 0 ;
293 }
294 void plan_C(elem ** pelems,unsigned int n, int key_num)
295 {
296 elem ** tpelems = (elem ** )malloc( sizeof (elem * ) * n);
297 int i;
298 for (i = 0 ;i < n;i ++ )
299 tpelems[i] = pelems[i];
300
301 msort(tpelems,n,key_num,sort_func_C);
302 print_keys(tpelems,n,key_num);
303 free(tpelems);
304 }
305
306
307
308
309 void analysis(elem ** pelems,unsigned int n, int key_num, int type)
310 {
311 struct timeval tv1;
312 struct timeval tv2;
313 unsigned long micro_time_passed;
314 switch (type)
315 {
316 case A:
317 {
318 gettimeofday( & tv1,NULL);
319
320 elem ** tpelems = (elem ** )malloc( sizeof (elem * ) * n);
321 int i;
322 for (i = 0 ;i < n;i ++ )
323 tpelems[i] = pelems[i];
324
325 plan_A(tpelems,n,key_num, 0 );
326
327 print_keys(tpelems,n,key_num);
328 free(tpelems);
329
330 gettimeofday( & tv2,NULL);
331
332 micro_time_passed = (tv2.tv_sec - tv1.tv_sec) * 1000 + (tv2.tv_usec - tv1.tv_usec) / 1000 ;
333 total_A_u += (tv2.tv_usec - tv1.tv_usec + (tv2.tv_sec - tv1.tv_sec) * 1000000 );
334 printf( " plan A cost %ld micro seconds sec:%ld usec %ld\n " ,micro_time_passed,(tv2.tv_sec - tv1.tv_sec),(tv2.tv_usec - tv1.tv_usec));
335 break ;
336
337 }
338 case B:
339 {
340 gettimeofday( & tv1,NULL);
341
342 plan_B(pelems,n,key_num);
343
344 gettimeofday( & tv2,NULL);
345
346 micro_time_passed = (tv2.tv_sec - tv1.tv_sec) * 1000 + (tv2.tv_usec - tv1.tv_usec) / 1000 ;
347
348 total_B_u += (tv2.tv_usec - tv1.tv_usec + (tv2.tv_sec - tv1.tv_sec) * 1000000 );
349 printf( " plan B cost %ld micro seconds sec:%ld usec %ld\n " ,micro_time_passed,(tv2.tv_sec - tv1.tv_sec),(tv2.tv_usec - tv1.tv_usec));
350 break ;
351 }
352 case C:
353 {
354 gettimeofday( & tv1,NULL);
355
356 plan_C(pelems,n,key_num);
357
358 gettimeofday( & tv2,NULL);
359
360 micro_time_passed = (tv2.tv_sec - tv1.tv_sec) * 1000 + (tv2.tv_usec - tv1.tv_usec) / 1000 ;
361
362 total_C_u += (tv2.tv_usec - tv1.tv_usec + (tv2.tv_sec - tv1.tv_sec) * 1000000 );
363 printf( " plan C cost %ld micro seconds sec:%ld usec %ld\n " ,micro_time_passed,(tv2.tv_sec - tv1.tv_sec),(tv2.tv_usec - tv1.tv_usec));
364 break ;
365 }
366 }
367 }
368
369
从测试结果来看,毫无疑问:肯定是第三种方法平均效率最高,但我一直以为方法B凭借着方法的简单性,要比方法A要快,可是实际上必不是如此...究其原因应该是B方法太呆板,无论源数据是什么情况全都是一股脑的从n键排到主键。看来再做事和做人的时候有时候也不能太呆板,凡事都有其解决之道,而不能一种方法走天下啊。呵呵,卖弄了卖弄了。