HDU - 5008 Boring String Problem (后缀数组+二分+RMQ)

Problem Description
In this problem, you are given a string s and q queries.

For each query, you should answer that when all distinct substrings of string s were sorted lexicographically, which one is the k-th smallest. 

A substring s i...j of the string s = a 1a 2 ...a n(1 ≤ i ≤ j ≤ n) is the string a ia i+1 ...a j. Two substrings s x...y and s z...w are cosidered to be distinct if s x...y ≠ S z...w
 

Input
The input consists of multiple test cases.Please process till EOF. 

Each test case begins with a line containing a string s(|s| ≤ 10 5) with only lowercase letters.

Next line contains a postive integer q(1 ≤ q ≤ 10 5), the number of questions.

q queries are given in the next q lines. Every line contains an integer v. You should calculate the k by k = (l⊕r⊕v)+1(l, r is the output of previous question, at the beginning of each case l = r = 0, 0 < k < 2 63, “⊕” denotes exclusive or)
 

Output
For each test case, output consists of q lines, the i-th line contains two integers l, r which is the answer to the i-th query. (The answer l,r satisfies that s l...r is the k-th smallest and if there are several l,r available, ouput l,r which with the smallest l. If there is no l,r satisfied, output “0 0”. Note that s 1...n is the whole string)
 

Sample Input
   
   
   
   
aaa 4 0 2 3 5
 

Sample Output
   
   
   
   
1 1 1 3 1 2 0 0
 

Source
2014 ACM/ICPC Asia Regional Xi'an Online
 

题意:求第k大的子串,输出左右端点,且左端点尽量小。
思路:首先,我们可以计算出不同的子串个数,这个在论文里有的,就是
n-sa[i]-height[i]。然后我们就可以统计第i大的字符串有的子串个数,然后二分查找到第k个所在的第sa[i]后缀,接着我们可以先确定右端点的范围来RMQ查找sa[j]最小的那个,只要是满足和sa[i]后缀的lcp的长度大于len,就代表也包含这个子串了,接着就是RMQ了,坑点就是l=mid的时候的多一个判断

#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cmath>
#include <queue>
//typedef long long ll;
typedef __int64 ll;
using namespace std;
const int maxn = 100010;

int sa[maxn]; 
int t1[maxn], t2[maxn], c[maxn];
int rank[maxn], height[maxn];

void build_sa(int s[], int n, int m) {
    int i, j, p, *x = t1, *y = t2;
    for (i = 0; i < m; i++) c[i] = 0;
    for (i = 0; i < n; i++) c[x[i] = s[i]]++;
    for (i = 1; i < m; i++) c[i] += c[i-1];
    for (i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;

    for (j = 1; j <= n; j <<= 1) {
        p = 0;
        for (i = n-j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++) 
            if (sa[i] >= j) 
                y[p++] = sa[i] - j;
        for (i = 0; i < m; i++) c[i] = 0;
        for (i = 0; i < n; i++) c[x[y[i]]]++;
        for (i = 1; i < m; i++) c[i] += c[i-1];
        for (i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];

        swap(x, y);
        p = 1, x[sa[0]] = 0;
        for (i = 1; i < n; i++) 
            x[sa[i]] = y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+j] == y[sa[i]+j] ? p-1 : p++;

        if (p >= n) break;
        m = p;
    }
}

void getHeight(int s[],int n) {
    int i, j, k = 0;
    for (i = 0; i <= n; i++)
        rank[sa[i]] = i;

    for (i = 0; i < n; i++) {
        if (k) k--;
        j = sa[rank[i]-1];
        while (s[i+k] == s[j+k]) k++;
        height[rank[i]] = k;
    }
}
int dp[maxn][30];
char str[maxn];
int r[maxn], ind[maxn][30];
ll b[maxn];

void initRMQ(int n) {
    int m = floor(log(n+0.0) / log(2.0));  
    for (int i = 1; i <= n; i++) 
        dp[i][0] = height[i];  

    for (int i = 1; i <= m; i++) {  
        for (int j = n; j; j--) {  
            dp[j][i] = dp[j][i-1];  
            if (j+(1<<(i-1)) <= n)  
                dp[j][i] = min(dp[j][i], dp[j+(1<<(i-1))][i-1]);  
        }  
    }  
}

int lcp(int l, int r) {
    int a = rank[l], b = rank[r];  
    if (a > b) 
        swap(a,b);  
    a++;  
    int m = floor(log(b-a+1.0) / log(2.0));  
    return min(dp[a][m], dp[b-(1<<m)+1][m]);  
}

void init(int n) {
    int m = floor(log(n+0.0) / log(2.0));  
    for (int i = 1; i <= n; i++) 
        ind[i][0] = sa[i];  

    for (int i = 1; i <= m; i++) {  
        for (int j = n; j; j--) {  
            ind[j][i] = ind[j][i-1];  
            if (j+(1<<(i-1)) <= n)  
                ind[j][i] = min(ind[j][i], ind[j+(1<<(i-1))][i-1]);  
        }  
    }  
}

int rmq(int a, int b) {
    int m = floor(log(b-a+1.0) / log(2.0));  
    return min(ind[a][m], ind[b-(1<<m)+1][m]);  
}

int main() {
    while (scanf("%s", str) != EOF) {
        int n = strlen(str);    
        for (int i = 0; i <= n; i++)
            r[i] = str[i];
        build_sa(r, n+1, 128);
        getHeight(r, n);
        initRMQ(n);
        init(n);

        b[0] = 0;
        for (int i = 1; i <= n; i++)
            b[i] = b[i-1] + n - sa[i] - height[i];

        int m;
        scanf("%d", &m);
        ll k;
        int lastl = 0, lastr = 0;
        while (m--) {
            scanf("%I64d", &k);
            k = (k ^ lastl ^ lastr)  + 1;
            if (k > b[n]) {
                printf("0 0\n");
                lastl = 0;
                lastr = 0;
                continue;
            }
            int id = lower_bound(b+1, b+1+n, k) - b;
            k -= b[id-1];
            int len = height[id] + k;
            int ll = id;
            int rr = id;
            int L = id, R = n;
            while (L <= R) {
                int mid = (L + R) / 2;
                if (sa[id] == sa[mid] || lcp(sa[id], sa[mid]) >= len) {
                    rr = mid;
                    L = mid + 1;
                }
                else R = mid - 1;
            }

            int ansl = rmq(ll, rr) + 1;
            int ansr = ansl + len - 1;
            printf("%d %d\n", ansl, ansr);
            lastl = ansl;
            lastr = ansr;
        }
    }    
    return 0;
}




你可能感兴趣的:(HDU - 5008 Boring String Problem (后缀数组+二分+RMQ))