Description
Given a string, we need to find the total number of its distinct substrings.
T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000
For each test case output one number saying the number of distinct substrings.
Sample Input:
2
CCCCC
ABABA
Sample Output:
5
9
Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.
题意:给定一个字符串,求不相同的子串。
思路:每个子串一定是某个后缀的前缀,那么原问题等价于求所有后缀之间的不相同的前缀的个数。如果所有的后缀按照 suffix(sa[1]), suffix(sa[2]),
suffix(sa[3]), …… ,suffix(sa[n])的顺序计算,不难发现,对于每一次新加
进来的后缀 suffix(sa[k]),它将产生 n-sa[k]个新的前缀。但是其中有height[k]个是和前面的字符串的前缀是相同的。所以 suffix(sa[k])将“贡献”
出 n-sa[k]-height[k]个不同的子串。累加后便是原问题的答案。
#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
using namespace std;
const int maxn = 1010;
int sa[maxn]; //SA数组,表示将S的n个后缀从小到大排序后把排好序的
//的后缀的开头位置顺次放入SA中
int t1[maxn], t2[maxn], c[maxn];
int rank[maxn], height[maxn];
int s[maxn];
char str[maxn];
void build_sa(int s[], int n, int m) {
int i, j, p, *x = t1, *y = t2;
for (i = 0; i < m; i++) c[i] = 0;
for (i = 0; i < n; i++) c[x[i] = s[i]]++;
for (i = 1; i < m; i++) c[i] += c[i-1];
for (i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;
for (j = 1; j <= n; j <<= 1) {
p = 0;
for (i = n-j; i < n; i++) y[p++] = i;
for (i = 0; i < n; i++)
if (sa[i] >= j)
y[p++] = sa[i] - j;
for (i = 0; i < m; i++) c[i] = 0;
for (i = 0; i < n; i++) c[x[y[i]]]++;
for (i = 1; i < m; i++) c[i] += c[i-1];
for (i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
swap(x, y);
p = 1, x[sa[0]] = 0;
for (i = 1; i < n; i++)
x[sa[i]] = y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+j] == y[sa[i]+j] ? p-1 : p++;
if (p >= n) break;
m = p;
}
}
void getHeight(int s[],int n) {
int i, j, k = 0;
for (i = 0; i <= n; i++)
rank[sa[i]] = i;
for (i = 0; i < n; i++) {
if (k) k--;
j = sa[rank[i]-1];
while (s[i+k] == s[j+k]) k++;
height[rank[i]]=k;
}
}
int check(int n, int k, int mid) {
int num = 1;
for (int i = 2; i <= n; i++) {
if (height[i] >= mid) {
num++;
if (num >= k) return 1;
}
else num = 1;
}
return 0;
}
int main() {
int t;
scanf("%d", &t);
while (t--) {
scanf("%s", str);
int n = strlen(str);
for (int i = 0; i <= n; i++)
s[i] = str[i];
build_sa(s, n+1, 128);
getHeight(s, n);
int ans = 0;
for (int i = 1; i <= n; i++)
ans += n - sa[i] - height[i];
printf("%d\n", ans);
}
return 0;
}