SPOJ - DISUBSTR Distinct Substrings(后缀数组求不相同的子串个数)

Description

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA: 
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

题意:给定一个字符串,求不相同的子串。
思路:每个子串一定是某个后缀的前缀,那么原问题等价于求所有后缀之间的不相同的前缀的个数。如果所有的后缀按照 suffix(sa[1]), suffix(sa[2]),
suffix(sa[3]), …… ,suffix(sa[n])的顺序计算,不难发现,对于每一次新加
进来的后缀 suffix(sa[k]),它将产生 n-sa[k]个新的前缀。但是其中有height[k]个是和前面的字符串的前缀是相同的。所以 suffix(sa[k])将“贡献”
出 n-sa[k]-height[k]个不同的子串。累加后便是原问题的答案。

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
using namespace std;
const int maxn = 1010;

int sa[maxn]; //SA数组,表示将S的n个后缀从小到大排序后把排好序的
//的后缀的开头位置顺次放入SA中
int t1[maxn], t2[maxn], c[maxn];
int rank[maxn], height[maxn];
int s[maxn];
char str[maxn];

void build_sa(int s[], int n, int m) {
    int i, j, p, *x = t1, *y = t2;
    for (i = 0; i < m; i++) c[i] = 0;
    for (i = 0; i < n; i++) c[x[i] = s[i]]++;
    for (i = 1; i < m; i++) c[i] += c[i-1];
    for (i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;

    for (j = 1; j <= n; j <<= 1) {
        p = 0;
        for (i = n-j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++) 
            if (sa[i] >= j) 
                y[p++] = sa[i] - j;
        for (i = 0; i < m; i++) c[i] = 0;
        for (i = 0; i < n; i++) c[x[y[i]]]++;
        for (i = 1; i < m; i++) c[i] += c[i-1];
        for (i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];

        swap(x, y);
        p = 1, x[sa[0]] = 0;
        for (i = 1; i < n; i++) 
            x[sa[i]] = y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+j] == y[sa[i]+j] ? p-1 : p++;

        if (p >= n) break;
        m = p;
    }
}

void getHeight(int s[],int n) {
    int i, j, k = 0;
    for (i = 0; i <= n; i++)
        rank[sa[i]] = i;
    for (i = 0; i < n; i++) {
        if (k) k--;
        j = sa[rank[i]-1];
        while (s[i+k] == s[j+k]) k++;
        height[rank[i]]=k;
    }
}

int check(int n, int k, int mid) {
    int num = 1;
    for (int i = 2; i <= n; i++) {
        if (height[i] >= mid) {
            num++;
            if (num >= k) return 1;
        }
        else num = 1;
    }
    return 0;
}

int main() {
    int t;
    scanf("%d", &t);
    while (t--) {
        scanf("%s", str);
        int n = strlen(str);
        for (int i = 0; i <= n; i++)
            s[i] = str[i];
        build_sa(s, n+1, 128);
        getHeight(s, n);

        int ans = 0;
        for (int i = 1; i <= n; i++)
            ans += n - sa[i] - height[i];

        printf("%d\n", ans);
    }
    return 0;
}

你可能感兴趣的:(SPOJ - DISUBSTR Distinct Substrings(后缀数组求不相同的子串个数))