[编程珠玑读书笔记]最长重复子串

问题定义:

    给定一个输入文本文件,查找其中最长的重复子字符串。例如,"Ask not what your country can do for you, but what you can do for you country"中最长的重复字串就是"can do for you","your country"是第二长的重复字串。如何编程找出最长的重复字串?如何找出出现次数超过M的重复字串?


示例1,从控制台输入:

#include<stdio.h>
#include<string.h>
#define MAX 10000

int Comp(const void *p, const void *q)
{
	return strcmp( *(char* const *)p, *(char* const *)q);
}

int comlen(char *p, char*q)
{
	int i= 0;
	while( *p && *p++ == *q++)
	{
		i++;
	}
	return i;
}
int main()
{
	int ch;
	int temp;
	char data[MAX];
	char *pdata[MAX];
	int n = 0, i = 0;
	int maxlen = 0, maxi = 0;
	printf("please input your string:\n");
	while(( ch = getchar()) != '\n' )
	{
		pdata[n] = &data[n];
		data[n++] = ch;
	}
	data[n] = '\0';
	qsort( pdata, n, sizeof(char*), Comp);

	for( i = 0; i < n - 1; i++)
	{
		temp = comlen( pdata[i], pdata[i +1]);
		if( temp > maxlen )
		{
			maxlen = temp;
			maxi = i;
		}
	}
	printf("%.*s\n", maxlen, pdata[maxi]);
	return 0;
}


示例2,从文件输入:

#include<stdio.h>
#include<string.h>
#define MAX 10000000

//qsort的比较函数,原型一定是这种形式
int Comp(const void *p, const void *q)
{
	return strcmp( *(char* const *)p, *(char* const *)q);
}

//最长重复字串的长度
int comlen(char *p, char*q)
{
	int i= 0;
	while( *p && *p++ == *q++)
	{
		i++;
	}
	return i;
}
int main()
{
	int ch;
	int temp;
	char data[MAX];
	char *pdata[MAX];
	int n = 0, i = 0;
	int maxlen = 0, maxi = 0;

	FILE *fp;
	if((fp = fopen("ls.txt","r")) == NULL)
	{
		printf("Can't open file\n");
	}
	while(( ch = fgetc(fp)) != EOF && n < MAX )
	{
		pdata[n] = &data[n];
		data[n++] = ch;
	}
	data[n] = '\0';
	qsort( pdata, n, sizeof(char*), Comp);

	for( i = 0; i < n - 1; i++)
	{
		temp = comlen( pdata[i], pdata[i +1]);
		if( temp > maxlen )
		{
			maxlen = temp;
			maxi = i;
		}
	}
	printf("%.*s\n", maxlen, pdata[maxi]);
	return 0;
}

示例3,重复出现M次的最长字串:

    子数组data[i……i+M]代表啦M+1个字符串。由于数组是有序的,通过在第一个和最后一个字符串上调用comlen,就可以很快的判断出M+1个字符串中有几个相同的字符。


#include<stdio.h>
#include<string.h>
#define MAX 100000

//qsort的比较函数,原型一定是这种形式
int Comp(const void *p, const void *q)
{
	return strcmp( *(char* const *)p, *(char* const *)q);
}

//最长重复字串的长度
int comlen(char *p, char*q)
{
	int i= 0;
	while( *p && *p++ == *q++)
	{
		i++;
	}
	return i;
}
int main()
{
	int ch;
	int temp;
	char data[MAX];
	char *pdata[MAX];
	int n = 0, i = 0;
	int maxlen = 0, maxi = 0;
	const int M = 5;
	FILE *fp;
	if((fp = fopen("ls.txt","r")) == NULL)
	{
		printf("Can't open file\n");
	}
	while(( ch = fgetc(fp)) != EOF && n < MAX )
	{
		pdata[n] = &data[n];
		data[n++] = ch;
	}
	data[n] = '\0';
	qsort( pdata, n, sizeof(char*), Comp);


	maxlen  = 0;
	for( i = 0; i < n - M; i++)
	{
		temp = comlen( pdata[i], pdata[i + M]);
		if( temp > maxlen )
		{
			maxlen = temp;
			maxi = i;
		}
	}
	printf("%.*s\n", maxlen, pdata[maxi]);
	return 0;
}





参考资料:http://topic.csdn.net/u/20071002/22/896b1597-fc39-466e-85d3-5bef6f7442f6.html

编程珠玑:http://ishare.iask.sina.com.cn/f/22736790.html

你可能感兴趣的:(编程,String,File,null,读书,FP)