输入:一个最多包含n个正整数的文件,每个数都小于n,其中n = 10^7。如果在输入文件中有任何整数重复出现就是致命错误。没有其他数据与该整数相关联。
版本一 rand()函数
/** * @file generate_random_data.c * @brief generate random data:generate different numbers by line to * random_data.txt. * your should input the max value and generate counts. * @author chenxilinsidney * @version 1.0 * @date 2014-12-29 */ #include
#include #include #include "memory.h" // #define NDEBUG #include // #define NDBG_PRINT #include "debug_print.h" typedef unsigned int TYPE; int main(void) { TYPE max_value = 0; TYPE random_number = 0; TYPE flag_can_repeat = 0; TYPE index, k; /// get max value /// to use "%" operater with rand(), max value should less than RAND_MAX printf("MAX VALUE of the random number: %u\n", (unsigned)RAND_MAX - 1); printf("Please input the max value(excluded) of the random number: "); if(scanf("%u", &max_value) != 1) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("can not get the right max value(excluded).\n"); DEBUG_PRINT_VALUE("%u", max_value); fflush(stdout); assert(0); exit(EXIT_FAILURE); } else if(max_value > (RAND_MAX - 1)) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("value exceeds the limit max value.\n"); DEBUG_PRINT_VALUE("%u", max_value); fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// get count printf("Please input the count of the random number: "); if(scanf("%u", &random_number) != 1) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("can not get the right count.\n"); DEBUG_PRINT_VALUE("%u", random_number); fflush(stdout); assert(0); exit(EXIT_FAILURE); } else if(random_number > max_value) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("count exceeds the max value.\n"); DEBUG_PRINT_VALUE("%u", max_value); fflush(stdout); assert(0); exit(EXIT_FAILURE); } printf("Please choose if data can be same in them(input 1(YES)/0(NO):"); if(scanf("%u", &flag_can_repeat) != 1) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("can not get the choice.\n"); DEBUG_PRINT_VALUE("%u", flag_can_repeat); fflush(stdout); assert(0); exit(EXIT_FAILURE); } else if(flag_can_repeat != 0 && flag_can_repeat != 1) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("can not get right choice(1 or 2).\n"); DEBUG_PRINT_VALUE("%u", flag_can_repeat); fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// generate random number TYPE* list = SMALLOC(random_number, TYPE); srand(time(NULL)); printf("start generating random numbers...........\n"); if(flag_can_repeat) { for(index = 0; index < random_number; index++) { TYPE flag = 1; while(flag) { list[index] = rand() % max_value; flag = 0; for(k = 0; k < index; k++) { if(list[k] == list[index]) { flag = 1; break; } } } } } else { for(index = 0; index < random_number; index++) { list[index] = rand() % max_value; } } /// write data to file FILE* fp = fopen("random_data.txt", "w"); if(fp == NULL) { perror("random_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } for(index = 0; index < random_number; index++) { fprintf(fp, "%u\n", list[index]); } if(fclose(fp) != 0) { perror("random_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } printf("random numbers saved to random_data.txt!\n"); /// free memory SFREE(&list); /// return return EXIT_SUCCESS; }
/** * @file debug.h * @brief debug with STDIO, your may use the follow three macro to debug width * a programme by IO. to remove the debug define NDBG_PRINT before including * the header.(this look likes
) * @author chenxilinsidney * @version 1.0 * @date 2014-12-17 */ #include #ifndef NDBG_PRINT #define DEBUG_PRINT_STATE printf("File: %s Line: %d\n", __FILE__, __LINE__) #define DEBUG_PRINT_VALUE(FORMAT, VALUE) printf("value " #VALUE \ " = " FORMAT "\n", (VALUE)) #define DEBUG_PRINT_STRING(STRING) printf(STRING) #else #define DEBUG_PRINT_STATE ((void)(0)) #define DEBUG_PRINT_VALUE(FORMAT, VALUE) ((void)(0)) #define DEBUG_PRINT_STRING(STRING) ((void)(0)) #endif
/** * @copyright Copyright (C),1988-1999, Cita109 Tech. Co., Ltd. * @file memory.h * @brief memory allocate and free with checking. * @author author time version desc * @author Chen Xi Lin 14/12/17 1.2 build the module * @version 1.2 * @date 2014-06-08 */ #ifndef __MEMORY_H__ #define __MEMORY_H__ #include
#include #include #define SMALLOC(num, type) ((type *)safe_malloc((num)*sizeof(type))) #define SCALLOC(num, type) ((type *)safe_calloc((num),sizeof(type))) #define SREALLOC(ptr, num, type) ((type *)safe_realloc((ptr),(num)*sizeof(type))) #define SFREE(pointer) (safe_free((void**)(pointer))) void* safe_malloc(size_t size); void* safe_calloc(size_t num_elements, size_t element_size); void* safe_realloc(void* ptr, size_t new_size); void safe_free(void** pointer); #endif // __MEMORY_H__
/** * @copyright Copyright (C),1988-1999, Cita109 Tech. Co., Ltd. * @file memory.c * @brief memory allocate and free with checking. * @author author time version desc * @author Chen Xi Lin 14/12/17 1.2 build the module * @version 1.2 * @date 2014-06-08 */ #include "memory.h" /** * @brief malloc with checking. * * @param[in] size bytes of memory * * @return pointer to memory */ void* safe_malloc(size_t size) { void* new_mem; new_mem = malloc(size); if (NULL == new_mem) { printf("Out of memory!\n"); fflush(stdout); assert(0); exit(EXIT_FAILURE); } return new_mem; } /** * @brief malloc with checking. * * @param[in] num_elements element nums * @param[in] element_size size of element * * @return pointer to memory */ void* safe_calloc(size_t num_elements, size_t element_size) { void* new_mem; new_mem = calloc(num_elements, element_size); if (NULL == new_mem) { printf("Out of memory!\n"); fflush(stdout); assert(0); exit(EXIT_FAILURE); } return new_mem; } /** * @brief realloc with checking. * * @param[in] ptr pointer to old memory * @param[in] new_size new memory size * * @return pointer to memory */ void* safe_realloc(void* ptr, size_t new_size) { void* new_mem; new_mem = realloc(ptr, new_size); if (NULL == new_mem) { printf("Out of memory!\n"); fflush(stdout); assert(0); exit(EXIT_FAILURE); } return new_mem; } /** * @brief free with assignment to NULL. * * @param[in] pointer pointer to memory */ void safe_free(void** pointer) { free(*pointer); *pointer = NULL; }
版本二 洗牌法
/** * @file generate_random_data.c * @brief generate random data:generate different numbers by line to * random_data.txt. * your should input the max value(excluded) and generate counts. * @author chenxilinsidney * @version 1.0 * @date 2014-12-29 */ #include
#include #include #include "memory.h" // #define NDEBUG #include // #define NDBG_PRINT #include "debug_print.h" typedef unsigned int TYPE; int main(void) { TYPE max_value = 0; TYPE random_number = 0; TYPE index; /// get max value /// to use "%" operater with rand(), max value should less than RAND_MAX if(scanf("%u", &max_value) != 1) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("can not get the right max value(excluded).\n"); DEBUG_PRINT_VALUE("%u", max_value); fflush(stdout); assert(0); exit(EXIT_FAILURE); } else if(max_value > (RAND_MAX - 1)) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("value exceeds the limit max value.\n"); DEBUG_PRINT_VALUE("%u", max_value); fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// get count if(scanf("%u", &random_number) != 1) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("can not get the right count.\n"); DEBUG_PRINT_VALUE("%u", random_number); fflush(stdout); assert(0); exit(EXIT_FAILURE); } else if(random_number > max_value) { DEBUG_PRINT_STATE; DEBUG_PRINT_STRING("count exceeds the max value.\n"); DEBUG_PRINT_VALUE("%u", max_value); fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// initialize number TYPE* list = SMALLOC(max_value, TYPE); for(index = 0; index < max_value; index++) { list[index] = index; } /// generate random number TYPE index_from, index_to; srand(time(NULL)); for(index = 0; index < max_value; index++) { index_from = rand() % max_value; index_to = rand() % max_value; while(index_from == index_to) { index_to = rand() % max_value; } TYPE temp = list[index_to]; list[index_to] = list[index_from]; list[index_from] = temp; } /// write data to stdout for(index = 0; index < random_number; index++) { printf("%u\n", list[index]); } /// free memory SFREE(&list); /// return return EXIT_SUCCESS; } 小结:
0 1 1 1 0 1 0 0 1 0 0位图数据结构 该数据结构描述了一个有限定义域的稠密集合,其中的每一个元素最多出现一次并且没有其他任何数据与该元素相关联。即使这些条件没有完全满足(例如,存在重复元素或额外的 数据),也可以用有限定义域内的键作为一个表项更复杂的表格的索引。(摘自书中)
下面为位图数据结构实现代码,按照题目要求对数据进行处理,这里使用的是4字节数据类型分段进行存储:/* phase 1: initialize set to empty */ for i = [0, N) bit[i] = 0 /* phase 2: insert present elements into the set */ for each i in the input file bit[i] = 1 /* phase 3: write the sorted output */ for i = [0, N) if bit[i] = 1 write i on the output file
1.与2相关的除法运算改为移位运算:n / 32 -> n >> 5
2.于2相关的求余运算改为与运算:n % 32 -> n & 0x1F (这点参考别的代码,以前没有注意这个方面,值得学习)
3.这里使用的二进制置位(赋1)运算方法,为:value |= mask,尽管不使用到二进制复位运算(赋0),但是值得一提:
value &= ~mask,而检查二进制位1或0的方法:return value & mask.
4.本文采用的文件流是标准输入输出,在linux系统下,可以采用重定向语句来实现简化程序:./a.out < input > output
/** * @file sort_by_bitmap_structure.c * @brief sort a list of random data quickly by bitmap structure. * @author chenxilinsidney * @version 1.0 * @date 2014-12-30 */ #include
#include #include // #define NDEBUG #include #include "memory.h" #include "type.h" // #define NDBG_PRINT #include "debug_print.h" typedef uint32 TYPE; /// max random data value #define MAX_VALUE 10000000 /// bitmap structure initialize to empty #define BITMAP_BITS 32 #define BITMAP_SHIFT 5 #define BITMAP_MASK 0x1F #define BITMAP_LENGTH ((unsigned)((MAX_VALUE)+(BITMAP_BITS)-1)>>(BITMAP_SHIFT)) TYPE bitmap_structure[BITMAP_LENGTH] = {0}; int main(void) { /// insert present elements into the set /// read data from file FILE* fr = fopen("random_data.txt", "r"); if(fr == NULL) { perror("random_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// set bitmap structure TYPE value; TYPE count = 0; while(fscanf(fr, "%u\n", &value) == 1) { bitmap_structure[value >> BITMAP_SHIFT] |= 1 << (value & BITMAP_MASK); count++; } printf("get total number: %d.\n", count); /// close read data file if(fclose(fr) != 0) { perror("random_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// write sorted output /// set write data file FILE* fw = fopen("sorted_data.txt","w"); if(fw == NULL) { perror("sorted_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// write sorted data to file TYPE i; for(i = 0; i < count; i++) { if(bitmap_structure[i >> BITMAP_SHIFT] & (1 << (i & BITMAP_MASK))) { fprintf(fw, "%u\n", i); } } /// close write data file if(fclose(fw) != 0) { perror("sorted_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } return EXIT_SUCCESS; } 四、多趟算法
多趟算法严格解决了题目中1MB内存的上限,因为若使用位图数据结构,10^7个数据需要的内存为10^7bits = 1250000bytes ~= 1.22MB > 1MB。多趟算法在本题中指的是多趟遍历文件采取同一算法进行处理,再整合多趟处理下来的各个结果。这里我一开始想的是,先读取数据(文件)前半部分进行排序,再去读数据(文件)后半部分进行排序,产生的问题是两个各自排完序后再合成时是无法全部有序排列的,无法解决题目问题。仔细想了想,这个想法确实是错误的。
正确的想法是:在我们执行多趟算法时应该先对输入的数据进行筛选才对。这里筛选方法是第一趟先选出小于(5 * 10^6)的数据进行排序写入文件,第二趟再选出大于等于(5 * 10^6)的数据进行排序写入文件,这样就解决了严格1MB内存上限问题同时也解决题目的排序问题。
/** * @file sort_by_bitmap_structure.c * @brief sort a list of random data quickly by bitmap structure in two steps. * @author chenxilinsidney * @version 1.0 * @date 2014-12-30 */ #include
#include #include // #define NDEBUG #include #include "memory.h" #include "type.h" // #define NDBG_PRINT #include "debug_print.h" typedef uint32 TYPE; /// max random data value #define MAX_VALUE 5000000 /// bitmap structure initialize to empty #define BITMAP_BITS 32 #define BITMAP_SHIFT 5 #define BITMAP_MASK 0x1F #define BITMAP_LENGTH ((unsigned)((MAX_VALUE)+(BITMAP_BITS)-1)>>(BITMAP_SHIFT)) TYPE bitmap_structure[BITMAP_LENGTH] = {0}; int main(void) { /// insert present elements into the set /// read data from file in first step FILE* fr = fopen("random_data.txt", "r"); if(fr == NULL) { perror("random_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// set bitmap structure in first step. TYPE value; TYPE count = 0; while(fscanf(fr, "%u\n", &value) == 1) { if(value < MAX_VALUE) { bitmap_structure[value >> BITMAP_SHIFT] |= 1 << (value & BITMAP_MASK); count++; } } printf("get total number in first step: %d.\n", count); /// write sorted output /// set write data file FILE* fw = fopen("sorted_data.txt","w"); if(fw == NULL) { perror("sorted_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// write sorted data to file in first step TYPE i; for(i = 0; i < count; i++) { if(bitmap_structure[i >> BITMAP_SHIFT] & (1 << (i & BITMAP_MASK))) { fprintf(fw, "%u\n", i); } } /// read data from file in second step fr = freopen("random_data.txt", "r", fr); if(fr == NULL) { perror("random_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// set bitmap structure in second step. count = 0; while(fscanf(fr, "%u\n", &value) == 1) { if(value >= MAX_VALUE) { value -= MAX_VALUE; bitmap_structure[value >> BITMAP_SHIFT] |= 1 << (value & BITMAP_MASK); count++; } } printf("get total number in second step: %d.\n", count); /// write sorted data to file in second step for(i = 0; i < count; i++) { if(bitmap_structure[i >> BITMAP_SHIFT] & (1 << (i & BITMAP_MASK))) { fprintf(fw, "%u\n", i + MAX_VALUE); } } /// close read data file if(fclose(fr) != 0) { perror("random_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// close write data file if(fclose(fw) != 0) { perror("sorted_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } return EXIT_SUCCESS; }
中的qsort函数进行排序(学习自《C和指针》一书),另外书中也介绍了对排好序的数组进行查找的函数bsearch函数(二分法),这里没有用到,未来实践时可以留意并使用。 使用库函数进行排序代码:
/** * @file sort_by_bitmap_structure.c * @brief sort a list of random data by standard library. * @author chenxilinsidney * @version 1.0 * @date 2014-12-30 */ #include
#include #include // #define NDEBUG #include #include "memory.h" #include "type.h" // #define NDBG_PRINT #include "debug_print.h" typedef uint32 TYPE; #define MAX_VALUE 10000000 uint32 list[MAX_VALUE] = {0}; /** * @brief this function is used to compare a and b used by qsort function. * * @param[in] a first data * @param[in] b second data * * @return >1 if a > b, =0 if a == b, <0 if a < b */ int list_compare(void const* a, void const* b) { TYPE first = *(TYPE*)a; TYPE second = *(TYPE*)b; if(first > second) return 1; else if(first == second) return 0; else return -1; } int main(void) { /// read data from file FILE* fr = fopen("random_data.txt", "r"); if(fr == NULL) { perror("random_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// read data to list TYPE count = 0; while(fscanf(fr, "%u\n", list + count) == 1) { ++count; } /// close read data file if(fclose(fr) != 0) { perror("random_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// sort data by standard library qsort(list, count, sizeof(TYPE), list_compare); /// set write data file FILE* fw = fopen("sorted_data.txt","w"); if(fw == NULL) { perror("sorted_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } /// write sorted data to file TYPE i; for(i = 0; i < count; i++) { fprintf(fw, "%u\n", list[i]); } /// close write data file if(fclose(fw) != 0) { perror("sorted_data.txt"); DEBUG_PRINT_STATE; fflush(stdout); assert(0); exit(EXIT_FAILURE); } return EXIT_SUCCESS; }
2.全文源码均开源(在UBUNTU + GCC4.8.2下编译并测试通过),可下载或查看:https://github.com/chenxilinsidney/funnycprogram/tree/master/programming_pearls/chapter_1