后缀数组&&后缀自动机小结

后缀数组板子

#include 
#include 
#include 
#include 
#include 

using namespace std;
int n;
const int N = 1000010;
int sa[N], x[N], c[N], y[N];
char s[N];
inline void SA()
{
    for (int i = 1; i <= n; ++i) x[i] = s[i];
    int m = 128;
    for (int i = 0; i <= m; ++i) c[i] = 0;
    for (int i = 1; i <= n; ++i) c[x[i]]++;
    for (int i = 1; i <= m; ++i) c[i] += c[i-1];
    for (int i = n; i; i--) sa[c[x[i]]--] = i;

    for (int k = 1, p; k <= n; k <<= 1){
        p = 0;
        for (int i = n; i > n - k; i--) y[++p] = i;
        for (int i = 1; i <= n; ++i)
            if(sa[i] > k) y[++p] = sa[i] - k;

        for (int i = 0; i <= m; ++i) c[i] = 0;
        for (int i = 1; i <= n; ++i) c[x[i]]++;
        for (int i = 1; i <= m; ++i) c[i] += c[i-1];
        for (int i = n; i; i--) sa[c[x[y[i]]]--] = y[i];

        p = y[sa[1]] = 1;
        for (int i = 2, a, b; i <= n; ++i){
            a = sa[i] + k > n? -1 : x[sa[i] + k];
            b = sa[i-1] + k > n?-1 : x[sa[i-1] + k];
            y[sa[i]] = (x[sa[i]] == x[sa[i-1]] && (a == b)? p : ++p);
        }
        swap(x, y);
        m = p;
        if (p == n)break;
    }
}
int height[N], rk[N], st[N][20];
inline void build_H(){
    int k = 0;
    for (int i = 1; i <= n; ++i) rk[sa[i]] = i;
    for (int i = 1; i <= n; ++i){
        if (rk[i] == 1) continue;
        if (k) --k;
        int j = sa[rk[i]-1];
        while(j + k <= n && i + k <= n && s[i+k]==s[j+k]) ++k;
        height[rk[i]] = k;
    }
}
inline void build_lcp(){
    for (int i = 1; i <= n; ++i)st[i][0] = height[i];
    for (int i = 1; i <= 17; ++i){
        for (int j = 1; j+(1<<i)-1 <= n; ++j)
            st[j][i] = max(st[j][i-1], st[j+(1<<(i-1))][i-1]);
    }
}
inline int query(int l, int r){
    int k = log2(r - l + 1);
    return max(st[l][k], st[r-(1<<k)+1][k]);
}
int main()
{
    scanf("%s", s + 1);
    n = strlen(s + 1);
    SA();
    build_H();
    for (int i = 1; i <= n; ++i)printf("%d ", sa[i]);
}

后缀自动机板子

#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;
const int N = 40010;
struct state{
    int len, link;
    int next[180];
}st[N];
int sz, last;
int s[N], a[N], n;
int l[N], r[N], c[N], q[N];
void sam_init(){
    memset(st, 0, sizeof(st));
    sz = last = 1;
}
void sam_extend(int c){
    int cur = ++sz;
    st[cur].len = st[last].len + 1;
    int p = last;
    while(p && !st[p].next[c]){
        st[p].next[c] = cur;
        p = st[p].link;
    }
    if (!p){
        st[cur].link = 1;
    } else {
        int q = st[p].next[c];
        if (st[p].len + 1 == st[q].len) {
            st[cur].link = q;
        } else {
            int clone = ++sz;
            st[clone].len = st[p].len + 1;
            memcpy(st[clone].next, st[q].next, sizeof(st[q].next));
            st[clone].link = st[q].link;
            while(p && st[p].next[c] == q) {
                st[p].next[c] = clone;
                p = st[p].link;
            }
            st[q].link = st[cur].link = clone;
        }
    }
    last = cur;
}

poj 1743
求最长不重叠子串,建好后缀树之后做树形dp,保存子串出现的左端点和右端点,答案是子串长和右端点和左端点的较小值。
树形dp不用跑dfs,因为是按照后缀链接来跑dp,结点中只保存了父亲指针不能直接dfs,但是因为长度长的一定在后缀树的下端,所以根据串长排个序,然后从后往前扫,这样能保证一定用下端的更新上端结点。

#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;
const int N = 40010;
struct state{
    int len, link;
    int next[180];
}st[N];
int sz, last;
int s[N], a[N], n;
int l[N], r[N], c[N], q[N];
void sam_init(){
    memset(l, 63, sizeof(l));
    memset(r, 0, sizeof(r));
    memset(c, 0, sizeof(c));
    memset(q, 0, sizeof(q));
    memset(st, 0, sizeof(st));
    sz = last = 1;
}
void sam_extend(int c){
    int cur = ++sz;
    st[cur].len = st[last].len + 1;
    l[cur] = r[cur] = st[cur].len;
    int p = last;
    while(p && !st[p].next[c]){
        st[p].next[c] = cur;
        p = st[p].link;
    }
    if (!p){
        st[cur].link = 1;
    } else {
        int q = st[p].next[c];
        if (st[p].len + 1 == st[q].len) {
            st[cur].link = q;
        } else {
            int clone = ++sz;
            st[clone].len = st[p].len + 1;
            memcpy(st[clone].next, st[q].next, sizeof(st[q].next));
            st[clone].link = st[q].link;
            while(p && st[p].next[c] == q) {
                st[p].next[c] = clone;
                p = st[p].link;
            }
            st[q].link = st[cur].link = clone;
        }
    }
    last = cur;
}
int main()
{
    while(scanf("%d", &n)){
        if (!n)break;
        int ans = 0;
        for (int i = 1; i <= n; ++i){
            scanf("%d", &s[i]);
            a[i] = s[i] - s[i - 1];
        }
        sam_init();
        for (int i = 2; i <= n; ++i){
            sam_extend(a[i] + 88);
        }
        for (int i = 1; i <= sz; ++i)c[st[i].len]++;
        for (int i = 1; i < n; ++i) c[i] += c[i-1];
        for (int i = sz; i; i--)q[c[st[i].len]--] = i;
        for (int i = sz; i; i--){
            int p = q[i];
            l[st[p].link] = min(l[st[p].link], l[p]);
            r[st[p].link] = max(r[st[p].link], r[p]);
        }
        for (int i = 1; i <= sz; ++i)
            ans = max(ans, min(st[i].len, r[i] - l[i]));
        if (ans < 4)puts("0");
        else printf("%d\n", ans + 1);
    }
}

bzoj3238
两种做法,用SA好想一些,维护一个单调栈
SAM做法:倒着建立后缀树,这样两个结点的后缀的lcp就变成了两个结点的最长公共后缀,在后缀树上就是两个结点的LCA长度,在后缀树上做树形DP就可以。
个人感觉SA好想还好写

#include 
#include 
#include 
#include 
#include 

using namespace std;
const int N = 500010;
int n;
int sa[N], x[N], y[N], c[N];
char s[N];
inline void SA(){
    for (int i = 1; i <= n; ++i) x[i] = s[i];
    int m = 128;
    for (int i = 0; i <= m; ++i) c[i] = 0;
    for (int i = 1; i <= n; ++i) c[x[i]]++;
    for (int i = 1; i <= m; ++i) c[i] += c[i-1];
    for (int i = n; i; i--) sa[c[x[i]]--] = i;

    for (int k = 1, p; k <= n; k <<= 1){
        p = 0;
        for (int i = n; i > n - k; i--) y[++p] = i;
        for (int i = 1; i <= n; ++i)
            if (sa[i] > k) y[++p] = sa[i] - k;

        for (int i = 0; i <= m; ++i) c[i] = 0;
        for (int i = 1; i <= n; ++i) c[x[i]]++;
        for (int i = 1; i <= m; ++i) c[i] += c[i-1];
        for (int i = n; i; i--) sa[c[x[y[i]]]--] = y[i];

        p = y[sa[1]] = 1;
        for (int i = 2, a, b; i <= n; ++i){
            a = sa[i] + k > n? -1 : x[sa[i] + k];
            b = sa[i-1] + k > n? -1 : x[sa[i-1] + k];
            y[sa[i]] = x[sa[i]] == x[sa[i-1]] && (a == b)? p : ++p;
        }
        swap(x, y);
        m = p;
        if (p == n) break;
    }
}
int height[N], rk[N];
inline void build_H(){
    int k = 0;
    for (int i = 1; i <= n; ++i) rk[sa[i]] = i;
    for (int i = 1; i <= n; ++i){
        if (rk[i] == 1) continue;
        if (k) --k;
        int j = sa[rk[i]-1];
        while(j + k <= n && i + k <= n && s[i+k] == s[j+k]) ++k;
        height[rk[i]] = k;
    }
}
struct node{
    int len, pos;
    long long val;
}st[N];
inline long long solve(){
    int tp = 0;
    long long ans1 = 0, ans = 0;
    st[0].pos = 0;
    for (int i = 2; i <= n; ++i){
        while(tp && st[tp].len > height[i]) tp--;
        if (st[tp].len == height[i])  st[tp].val += 1ll * (i - 1 - st[tp].pos) * height[i], st[tp].pos = i - 1;
        else st[tp + 1] = (node){height[i], i - 1, st[tp].val + 1ll * (i - 1 - st[tp].pos) * height[i]}, tp++;
        ans1 += st[tp].val;
        ans += 1ll * (i - 1) * (n - 1);
    }
    ans += 1ll * n * (n - 1);
//    cout<
    return ans - ans1 * 2;
}
int main()
{
    scanf("%s", s + 1);
    n = strlen(s + 1);
    SA();
    build_H();
//    for (int i = 1; i <= n; ++i)cout<
//    cout<
    printf("%lld\n", solve());
}

bzoj4516
题意是求对每一个前缀求不同子串个数
思考SA求不同子串个数的求法,是对于每个后缀来找,所以倒着建串,就可以对每个前缀求不同子串的个数。按照题目要求将操作离线,然后倒着扫一遍+链表维护一下就好。
这个题用SAM比较简单,每个前缀的子串个数就是用当前结点减去父亲结点的最长长度就可以了,思维难度也不高

#include 
#include 
#include 
#include 
#include 
#include 
using namespace std;
const int N = 100010;
struct state{
    int len, link;
    map<int, int>nxt;
}st[N*2];
int sz, last;
int s[N], n;
inline void sam_init(){
    sz = last = 1;
}
inline int sam_extend(int c){
    int cur = ++sz;
    st[cur].len = st[last].len + 1;
    int p = last;
    while(p && !st[p].nxt.count(c)){
        st[p].nxt[c] = cur;
        p = st[p].link;
    }
    if (!p){
        st[cur].link = 1;
    } else {
        int q = st[p].nxt[c];
        if (st[p].len + 1 == st[q].len){
            st[cur].link = q;
        } else {
            int clone = ++sz;
            st[clone].len = st[p].len + 1;
            st[clone].nxt = st[q].nxt;
            st[clone].link = st[q].link;
            while(p && st[p].nxt[c] == q){
                st[p].nxt[c] = clone;
                p = st[p].link;
            }
            st[q].link = st[cur].link = clone;
        }
    }
    last = cur;
//    printf("%d %d %d %d\n", cur, st[cur].link, st[cur].len, st[st[cur].link].len);
    return st[cur].len - st[st[cur].link].len;
}
int main()
{
    long long ans = 0;
    cin>>n;
    sam_init();
    for (int i = 1; i <= n; ++i){
        scanf("%d", &s[i]);
        ans += 1ll * sam_extend(s[i]);
        printf("%lld\n", ans);
    }
}

bzoj3998
求字典序第k大子串,T为0则表示不同位置的相同子串算作一个。T=1则表示不同位置的相同子串算作多个
T=0时,跑一个后缀自动机上的dp,求不同子串个数,相当于求路径数目,然后然后在树上一个一个点的找就可以了。
T=1时,现求一个子串出现次数,做法是按照后缀链接跑树形dp,字符串上的点赋值为1,克隆出的点初值为0,跑完dp后在按照第一种方法做一下就可以

#include 
#include 
#include 
#include 
#include 

using namespace std;
const int N = 1000010;
struct state{
    int len, link;
    int nxt[27];
}st[N];
int n, sz, T, last;
long long k;
char s[N];
int c[N], q[N];
long long dp[N], cnt[N];
inline void sam_init(){
    sz = last = 1;
}
inline void sam_extend(int c){
    int cur = ++sz;
    st[cur].len = st[last].len + 1;
    int p = last;
    while(p && !st[p].nxt[c]){
        st[p].nxt[c] = cur;
        p = st[p].link;
    }
    if (!p) st[cur].link = 1;
    else {
        int q = st[p].nxt[c];
        if (st[p].len + 1 == st[q].len){
            st[cur].link = q;
        } else {
            int clone = ++sz;
            st[clone].len = st[p].len + 1;
            memcpy(st[clone].nxt, st[q].nxt, sizeof(st[q].nxt));
            st[clone].link = st[q].link;
            while(p && st[p].nxt[c] == q){
                st[p].nxt[c] = clone;
                p = st[p].link;
            }
            st[q].link = st[cur].link = clone;
        }
    }
    last = cur;
}
inline void sam(){
    long long now = 0;
    int p = 1;
    while(1){
        for (int i = 1; i <= 26; ++i){
            if (now + dp[st[p].nxt[i]] < k)
                now += dp[st[p].nxt[i]];
            else {
                putchar('a' + i - 1);
                now += cnt[st[p].nxt[i]];
                if (now >= k) return;
                p = st[p].nxt[i];
                break;
            }
        }
    }
}
int main()
{
    scanf("%s", s + 1);
    n = strlen(s + 1);
    sam_init();
    for (int i = 1; i <= n; ++i)
        sam_extend(s[i] - 'a' + 1);
    for (int i = 1; i <= sz; ++i) c[st[i].len]++;
    for (int i = 1; i <= n; ++i) c[i] += c[i-1];
    for (int i = sz; i; i--) q[c[st[i].len]--] = i;
    scanf("%d%lld", &T, &k);
    if (!T){
        for (int i = 1; i <= sz; ++i) cnt[i] = 1;
    } else {
        int p = 1;
        for (int i = 1; i <= n; ++i){
            int x = s[i] - 'a' + 1;
            p = st[p].nxt[x];
            cnt[p] = 1;
        }
        for (int i = sz; i >= 1; --i){
            p = q[i];
            cnt[st[p].link] += cnt[p];
        }
    }
    for (int i = sz; i >= 1; --i){
        int p = q[i];
        if (p != 1) dp[p] = cnt[p];
        for (int j = 1; j <= 26; ++j)
            dp[p] += dp[st[p].nxt[j]];
    }
    if (k > dp[1])puts("-1");
    else sam();
}

SPOJ1812
求多个字符串公共最长子串,用后缀数组是O(NlogN)的,后缀自动机可以达到O(N),两种做法:
一种把所有字符串连成一个串,用不同的字符来连接,这里需要状态压缩,所以对字符串总数有要求;另一种对第一个串建立自动机,然后用其他串去跑自动机,记录最大能到达的位置,一个点能到到达的长度是所有串达到此点长度的最小值。

#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;
const int N = 500010;
struct state{
    int len, link;
    int next[30];
}st[N];
int sz, last;
char s[N];
int n;
int MAX[N], MIN[N];
inline void sam_init(){
    memset(st, 0, sizeof(st));
    sz = last = 1;
}
inline void sam_extend(int c){
    int cur = ++sz;
    st[cur].len = st[last].len + 1;
    int p = last;
    while(p && !st[p].next[c]){
        st[p].next[c] = cur;
        p = st[p].link;
    }
    if (!p){
        st[cur].link = 1;
    } else {
        int q = st[p].next[c];
        if (st[p].len + 1 == st[q].len){
            st[cur].link = q;
        } else {
            int clone = ++sz;
            st[clone].len = st[p].len + 1;
            memcpy(st[clone].next, st[q].next, sizeof(st[q].next));
            st[clone].link = st[q].link;
            while(p && st[p].next[c] == q){
                st[p].next[c] = clone;
                p = st[p].link;
            }
            st[q].link = st[cur].link = clone;
        }
    }
    last = cur;
}
int b[N], q[N];
inline void sam(){
    memset(MAX, 0, sizeof(MAX));
    int now = 1;
    int len = 0;
    for (int i = 1; i <= n; ++i) {
        int c = s[i] - 'a' + 1;
        if (st[now].next[c])
            now = st[now].next[c], len++;
        else {
            while (now && !st[now].next[c]) now = st[now].link;
            if (!now) {
                now = 1, len = 0;
            } else len = st[now].len + 1, now = st[now].next[c];
        }
        MAX[now] = max(MAX[now], len);
    }
    for (int i = sz; i; i--){
        int p = q[i];
        if (MAX[p]) MAX[st[p].link] = st[st[p].link].len;
    }
    for (int i = 1; i <= sz; ++i) MIN[i] = min(MIN[i], MAX[i]);
}
int main()
{
    memset(MIN, 0x3f, sizeof(MIN));
    scanf("%s", s + 1);
    n = strlen(s + 1);
    sam_init();
    for (int i = 1; i <= n; ++i)
        sam_extend(s[i] - 'a' + 1);
    for (int i = 1; i <= sz; ++i) b[st[i].len]++;
    for (int i = 1; i < n; ++i) b[i] += b[i-1];
    for (int i = sz; i; i--) q[b[st[i].len]--] = i;
    while(~scanf("%s", s + 1)) {
        n = strlen(s + 1);
        sam();
    }
    int ans = 0;
    for (int i = 1; i <= sz; ++i)
        if (MIN[i] < N) ans = max(ans, MIN[i]);
    printf("%d", ans);
}

你可能感兴趣的:(字符串)