题目:
从N个数中选取最大的前10个, 有序输出.
N最大可能达到1000亿
每个数范围是0 - 2147483647
C语言版测试结果:
输入100万条
总计[1000000]个输入
总计比较[2001654]次
总计写内存[552]次
总计耗时[0.014687s]
C语言版本解决方案
#include <stdio.h> #include <time.h> #include <stdlib.h> #include <unistd.h> #include <strings.h> /* * author: goosman.lei * mail: [email protected] * blog: http://blog.csdn.net/lgg201 */ #define BUFF_LEN (4096) /* #define DEBUG */ #define INFO typedef struct queue_s queue_t; struct queue_s { int data; queue_t *next; }; static void generate_test_data(long n) { long i; int r; int l; l = sizeof(int); srand(time(NULL)); for ( i = 0; i < n; i ++ ) { r = rand(); fprintf(stdout, "%d\n", r); write(STDERR_FILENO, &r, l); } } static int read_input(int fd, void *buff, int buff_len) { int i, ret; for ( i = 0; i < buff_len; ) { ret = read(fd, buff, BUFF_LEN); if ( -1 == ret ) { perror("read error\n"); exit(0); } else if ( 0 == ret ) { break; } else { buff += ret; i += ret; } } return i; } static void dump_link(queue_t *q, int n) { for ( ; q != NULL; q = q->next ) fprintf(n ? stderr : stdout, "%d\n", q->data); if ( n ) printf("\n"); } int main(int argc, char *argv[]) { int *sbuff; int i, j, n; queue_t *rbuff, **tmp, *t; #ifdef INFO int s_0, s_1, s_2; struct timeval begin, end; #endif if ( argc < 2 ) { printf("usage: \n\t1. 生成测试数据: %s <number> /* 标准错误以二进制方式输出测试数据, 标准输出以文本方式输出测试数据用于脚本校验 */\n\t2. 执行Top 10查找: %s <exec> /* 标准输出输出前10个最大数据(倒序), 开启INFO时在标准错误输出统计信息, 开启DEBUG时在标准错误输出调试信息\n", argv[0], argv[0]); return (0); } if ( strcmp(argv[1], "exec") != 0 ) { /* 不考虑数字输入的容错了 */ generate_test_data(atoi(argv[1])); return 0; } sbuff = malloc(1024 * 1024 * 4); rbuff = malloc(sizeof(queue_t) * 10); /* 缓冲区初始化 */ bzero(rbuff, sizeof(queue_t) * 10); for ( i = 0; i < 9; i ++ ) { (rbuff + i)->next = (rbuff + i + 1); (rbuff + i)->data = -1; } (rbuff + 9)->data = -1; (rbuff + 9)->next = NULL; #ifdef DEBUG dump_link(rbuff, 1); #endif #ifdef INFO s_0 = 0; s_1 = 0; s_2 = 0; gettimeofday(&begin, NULL); #endif while ( 0 != (n = read_input(STDIN_FILENO, sbuff, 1024 * 1024 * 4)) ) { #ifdef INFO s_0 ++; #endif j = n / 4; #ifdef INFO s_2 += j; #endif for ( i = 0; i < j; i ++ ) { #ifdef INFO s_0 ++; #endif #ifdef DEBUG fprintf(stderr, "processing %d\n", *(sbuff + i)); #endif for ( tmp = &rbuff; *tmp != NULL && *(sbuff + i) >= (*tmp)->data; ) { tmp = &((*tmp)->next); #ifdef INFO s_0 += 2; #endif } #ifdef INFO s_0 ++; #endif if ( *tmp == rbuff ) { continue; } #ifdef DEBUG fprintf(stderr, "tmp %d %p, rbuff %d %p\n", (*tmp) == NULL ? -1 : (*tmp)->data, *tmp, rbuff->data, rbuff); #endif rbuff->data = *(sbuff + i); #ifdef INFO s_1 ++; s_0 ++; #endif if ( *tmp != rbuff->next ) { t = rbuff; rbuff = rbuff->next; t->next = NULL == *tmp ? NULL : *tmp; *tmp = t; #ifdef INFO s_1 += 4; s_0 ++; #endif } #ifdef DEBUG dump_link(rbuff, 1); #endif } } #ifdef INFO gettimeofday(&end, NULL); #endif dump_link(rbuff, 0); #ifdef INFO fprintf(stderr, "总计[%d]个输入\n总计比较[%d]次\n总计写内存[%d]次\n总计耗时[%0.6fs]\n", s_2, s_0, s_1, (end.tv_sec * 1000000 + end.tv_usec - begin.tv_sec * 1000000 - begin.tv_usec) / 1000000.0); #endif return 0; }