bzoj3998 后缀数组复习

对于一个给定长度为N的字符串,求它的第K小子串是什么。其中可能有重复和不重复的K小子串。


我只会后缀数组

sa做法
当T=0的询问,我们从sa[i]开始,每次sa[i]这个后缀,我们会加入n-sa[i]+1个子串,其中height[i]个是和sa[i-1]重复的,所以我们记一个变量每次加上n+1-height[i]-sa[i],一旦超过k,则从sa[i]开始长度为k-pre+height[i]的串,为什么是k-pre+height[i]呢?我个人理解是k-pre表示从sa[i]开始还需要k-pre个字符,然而前height[i]和上一次算的重复了,所以还要加上height[i].
T=1:二分答案是第几小的串(可重复的),然后判断有多少个串比它小(可重复的)
假设当前的串在SA上排i,长度为len,那么排名在他之前的i−1个串的都比他小,排名在他之后的串,看两个串的最长公共前缀长度即可,首先二分出一个位置满足这个串被sa[i]的后缀包含,在之前比该串小的可以直接加,之后的我们发现和sa[i]的最长公共前缀就是比该串小的,然后for一下加上去就好了

#include
#include
#include
#include
#include
using namespace std;
const int MAXN = 500005;
int n, m, i, j, k, sa[MAXN], rank[MAXN], height[MAXN], w[MAXN], x[MAXN], tot, ty;
long long sum[MAXN], sum1[MAXN];
char c[MAXN];
inline bool check(int p)
{
    int l = 1, r = n;
    while (l != r - 1)
    {
    	if (l == r) break;
        int mid = (l + r) >> 1;
        if (sum[mid] >= p) r = mid;
        else l = mid;
    }
    int begi = 0, hi = 0;
    long long tot = 0;
    if (sum[l] >= p) begi = l + 1, hi = p - sum[l - 1] + height[l], tot = hi + sum1[l - 1];
    else begi = r + 1, hi = p - sum[r - 1] + height[r], tot = hi + sum1[r - 1];
    if (tot >= k) return 1;
    for(int i = begi; i <= n; i ++)
    {
        hi = min(hi, height[i]);
        if (!hi) break;
        tot += hi;
        if (tot >= k) return 1; 
    }
    return 0;
}
int main()
{
	//freopen("string.in", "r", stdin);
	//freopen("string.out", "w", stdout);
    scanf("%s", c + 1);
    n = strlen(c + 1);
    for(i = 1; i <= n; i ++)
        ++w[x[i] = c[i] - 96];
    for(i = 2; i <= 30; i ++)
        w[i] += w[i - 1];
    for(i = n; i >= 1; i --)
        sa[w[x[i]]--] = i;
    for(k = 1; k <= n; k <<= 1)
    {
        int t = 0;
        for(i = n; i >= n - k + 1; i --)
            rank[++t] = i;
        for(i = 1; i <= n; i ++)
            if (sa[i] - k > 0) rank[++t] = sa[i] - k;
        memset(w, 0, sizeof(w));
        for(i = 1; i <= n; i ++)
            w[x[i]] ++;
        for(i = 2; i <= n; i ++)
            w[i] += w[i - 1];
        for(i = n; i >= 1; i --)
            sa[w[x[rank[i]]]--] = rank[i];
        int m = 0;
        for(i = 1; i <= n; i ++)
        {
            int u = sa[i], v = sa[i - 1];
            if (x[u] != x[v] || x[u + k] != x[v + k]) rank[u] = ++m;
            else rank[u] = m;
        }
        if (m == n) break;
        for(i = 1; i <= n; i ++)
            swap(rank[i], x[i]);
    }
    tot = 0;
    for(i = 1; i <= n; i ++)
    {
        if (tot) tot --;
        int j = sa[rank[i] - 1];
        while (c[i + tot] == c[j + tot]) tot ++;
        height[rank[i]] = tot;
    }
    cin >> ty >> k;
    if (!ty)
    {
        int tot = 0;
        for(i = 1; i <= n; i ++)
        {
            int pre = tot;
            tot += n + 1 - sa[i] - height[i];
            if (tot >= k)
            {
            	 cout << i << endl;
                int j = k - pre + height[i];
                for(k = sa[i]; k < sa[i] + j; k ++)
                    printf("%c", c[k]);
                break;
            }
        }
    }
    else
    {
        for(i = 1; i <= n; i ++)
            sum[i] = sum[i - 1] + n + 1 - sa[i] - height[i], sum1[i] = sum1[i - 1] + n + 1 - sa[i];
        int l = 1, r = k;
        while (l != r - 1)
        {
            int mid = (l + r) >> 1;
            if (check(mid)) r = mid;
            else l = mid;
        }
        int ans = 0;
        if (check(l)) ans = l;
        else if (check(r)) ans = r;
        else {cout << -1; return 0;}
        l = 1, r = n;
        while (l != r - 1)
        {
            int mid = (l + r) >> 1;
            if (sum[mid] >= (long long)ans) r = mid;
            else l = mid;
        }
        int begi = 0, end = 0;
        if (sum[l] >= (long long)ans) begi = l, end = ans - sum[l - 1] + height[l];
        else begi = r, end = ans - sum[r - 1] + height[r];
        int jj = sa[begi] + end;
        for(i = sa[begi]; i < jj; i ++)
            printf("%c", c[i]);
    }
}


你可能感兴趣的:(后缀数组)