SPOJ 694(后缀数组)

D - New Distinct Substrings
Time Limit:280MS     Memory Limit:1572864KB     64bit IO Format:%lld & %llu
Submit  Status

Description

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20; Each test case consists of one string, whose length is <= 50000

Output

For each test case output one number saying the number of distinct substrings.

Example

Input:
2
CCCCC
ABABA

Output:
5
9

Hint

Added by: Hoang Hong Quan
Date: 2006-01-18
Time limit: 0.280s
Source limit: 50000B
Memory limit: 1536MB
Cluster: Cube (Intel G860)
Languages: All except: NODEJS PERL 6 VB.net
Resource: Base on a problem in ByteCode06



题意:求一个字符串有多少个不同的子串




题解:直接差的题解,想不到这和后缀数组有什么关系。。。。。以为是DP呢


http://blog.csdn.net/acm_cxlove/article/details/7930422


每一个子串一定是某个后缀的前缀,那么问题便等价于求所有后缀之间的不相同的前缀个数。我们按sa的顺序来考虑,当加入sa[k]的时候,sa[k]这个后缀的长度为n-sa[k],那么便有n-sa[k]个前缀,但是由heigh数组可知sa[k]与sa[k-1]有height[k]个前缀是相同的,所以要除去,最终的答案便是sigma(n-sa[k]+height[k])



#include<cstdio>    
#include<cstring>    
#include<cstdlib>    
#include<cmath>    
#include<iostream>    
#include<algorithm>    
#include<vector>    
#include<map>    
#include<set>    
#include<queue>    
#include<string>    
#include<bitset>    
#include<utility>    
#include<functional>    
#include<iomanip>    
#include<sstream>    
#include<ctime>    
using namespace std;

#define N int(1e5)    
#define inf int(0x3f3f3f3f)    
#define mod int(1e9+7)    
typedef long long LL;


#ifdef CDZSC    
#define debug(...) fprintf(stderr, __VA_ARGS__)    
#else    
#define debug(...)     
#endif    

char s[N];
int sa[N], t[N], t2[N], c[N];

void build_sa(int n, int m)//n表示字符串的长度,m表示基数,用于m基数排序,数字的上限  
{
	int i, *x = t, *y = t2;
	for (i = 0; i<m; i++)c[i] = 0;
	for (i = 0; i<n; i++)c[x[i] = s[i]]++;
	for (i = 1; i<m; i++)c[i] += c[i - 1];
	for (i = n - 1; i >= 0; i--)sa[--c[x[i]]] = i;
	for (int k = 1; k <= n; k <<= 1)
	{
		int p = 0;
		for (i = n - k; i<n; i++)y[p++] = i;
		for (i = 0; i<n; i++)if (sa[i] >= k)y[p++] = sa[i] - k;

		for (i = 0; i<m; i++)c[i] = 0;
		for (i = 0; i<n; i++)c[x[y[i]]]++;
		for (i = 1; i<m; i++)c[i] += c[i - 1];
		for (i = n - 1; i >= 0; i--)sa[--c[x[y[i]]]] = y[i];
		swap(x, y);
		p = 1; x[sa[0]] = 0;
		for (i = 1; i<n; i++)
			x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k] ? p - 1 : p++;
		if (p >= n)break;
		m = p;
	}
}

int Rank[N], height[N];
void gethight(int n)//n表示字符串的长度  
{
	int i, j, k = 0;
	for (int i = 0; i<n; i++)Rank[sa[i]] = i;
	for (int i = 0; i<n; i++)
	{
		if (k)k--;
		else k = 0;
		int j = sa[Rank[i] - 1];
		while (s[i + k] == s[j + k])k++;
		height[Rank[i]] = k;
	}
}
int main()
{
#ifdef CDZSC    
	freopen("i.txt", "r", stdin);
	//freopen("o.txt","w",stdout);    
	int _time_jc = clock();
#endif    
	int test;
	scanf("%d", &test);
	while (test--)
	{
		LL ans;
		scanf("%s", s);
		int len = strlen(s);
		ans = 0;
		build_sa(len+1, 128);
		gethight(len+1);
		for (int i = 1; i <= len; i++)
		{
			//printf("%d %d %d\n", sa[i], height[i], height[i]);
			ans +=( len-sa[i]-height[i]);
			/*sa数组表示下标,
			height[i]数组表示sa[i-1]和sa[i]的最长公共前缀,
			每一个子串一定是某个后缀的前缀,那么问题便等价
			于求所有后缀之间的不相同的前缀个数。我们按sa的
			顺序来考虑,当加入sa[k]的时候,sa[k]这个后缀的
			长度为n-sa[k],那么便有n-sa[k]个前缀,但是由
			heigh数组可知sa[k]与sa[k-1]有height[k]个前缀
			是相同的,所以要除去,最终的答案便是
			sigma(n-sa[k]+height[k])*/
		}
		printf("%lld\n",ans);
	}
#ifdef CDZSC    
	debug("time: %d\n", int(clock() - _time_jc));
#endif    
	return 0;
}







你可能感兴趣的:(SPOJ 694(后缀数组))