问题定义:
给定一个输入文本文件,查找其中最长的重复子字符串。例如,"Ask not what your country can do for you, but what you can do for you country"中最长的重复字串就是"can do for you","your country"是第二长的重复字串。如何编程找出最长的重复字串?如何找出出现次数超过M的重复字串?
示例1,从控制台输入:
#include<stdio.h> #include<string.h> #define MAX 10000 int Comp(const void *p, const void *q) { return strcmp( *(char* const *)p, *(char* const *)q); } int comlen(char *p, char*q) { int i= 0; while( *p && *p++ == *q++) { i++; } return i; } int main() { int ch; int temp; char data[MAX]; char *pdata[MAX]; int n = 0, i = 0; int maxlen = 0, maxi = 0; printf("please input your string:\n"); while(( ch = getchar()) != '\n' ) { pdata[n] = &data[n]; data[n++] = ch; } data[n] = '\0'; qsort( pdata, n, sizeof(char*), Comp); for( i = 0; i < n - 1; i++) { temp = comlen( pdata[i], pdata[i +1]); if( temp > maxlen ) { maxlen = temp; maxi = i; } } printf("%.*s\n", maxlen, pdata[maxi]); return 0; }
示例2,从文件输入:
#include<stdio.h> #include<string.h> #define MAX 10000000 //qsort的比较函数,原型一定是这种形式 int Comp(const void *p, const void *q) { return strcmp( *(char* const *)p, *(char* const *)q); } //最长重复字串的长度 int comlen(char *p, char*q) { int i= 0; while( *p && *p++ == *q++) { i++; } return i; } int main() { int ch; int temp; char data[MAX]; char *pdata[MAX]; int n = 0, i = 0; int maxlen = 0, maxi = 0; FILE *fp; if((fp = fopen("ls.txt","r")) == NULL) { printf("Can't open file\n"); } while(( ch = fgetc(fp)) != EOF && n < MAX ) { pdata[n] = &data[n]; data[n++] = ch; } data[n] = '\0'; qsort( pdata, n, sizeof(char*), Comp); for( i = 0; i < n - 1; i++) { temp = comlen( pdata[i], pdata[i +1]); if( temp > maxlen ) { maxlen = temp; maxi = i; } } printf("%.*s\n", maxlen, pdata[maxi]); return 0; }
子数组data[i……i+M]代表啦M+1个字符串。由于数组是有序的,通过在第一个和最后一个字符串上调用comlen,就可以很快的判断出M+1个字符串中有几个相同的字符。
#include<stdio.h> #include<string.h> #define MAX 100000 //qsort的比较函数,原型一定是这种形式 int Comp(const void *p, const void *q) { return strcmp( *(char* const *)p, *(char* const *)q); } //最长重复字串的长度 int comlen(char *p, char*q) { int i= 0; while( *p && *p++ == *q++) { i++; } return i; } int main() { int ch; int temp; char data[MAX]; char *pdata[MAX]; int n = 0, i = 0; int maxlen = 0, maxi = 0; const int M = 5; FILE *fp; if((fp = fopen("ls.txt","r")) == NULL) { printf("Can't open file\n"); } while(( ch = fgetc(fp)) != EOF && n < MAX ) { pdata[n] = &data[n]; data[n++] = ch; } data[n] = '\0'; qsort( pdata, n, sizeof(char*), Comp); maxlen = 0; for( i = 0; i < n - M; i++) { temp = comlen( pdata[i], pdata[i + M]); if( temp > maxlen ) { maxlen = temp; maxi = i; } } printf("%.*s\n", maxlen, pdata[maxi]); return 0; }
参考资料:http://topic.csdn.net/u/20071002/22/896b1597-fc39-466e-85d3-5bef6f7442f6.html
编程珠玑:http://ishare.iask.sina.com.cn/f/22736790.html