后缀自动机合集 I

简介


有关后缀自动机的理论和证明各路大神的博客多如牛毛,就不细说。

贴图太占载入速度了,还是去看原版的解析吧  http://hi.baidu.com/myidea/item/142c5cd45901a51820e25039

对于每个State点,father指向与该节点可接收相同后缀的节点,其表示的字串通常是当前节点表示字串的后面部分,

因此State->step - Sate->father->step 代表从任一后缀串起始走到该状态的路径数


hdu 4622 Reincarnation

对每个状态点减去父状态的步骤数的和就是字符串所有字串的数量

#include <stdio.h>
#include <vector>
#include <string.h>
#include <algorithm>
using namespace std;
const int MAXN = 2010;
int ans[MAXN][MAXN];
char s[MAXN];
struct node
{
    node*nxt[26], *fa;
    int step;
    void clear()
    {
        fa = 0, step = 0;
        memset ( nxt, 0, sizeof nxt );
    }
    int calc()
    {
        if ( !fa ) { return 0; }
        return step - fa->step;
    }
} *root, *tail, nodePool[MAXN * 2], *cur;
void init()
{
    cur = nodePool;
    root = tail = cur++;
    root->clear();
}
int tot;
void Insert ( int w )
{
    node *p = tail, *np = cur++;
    np->clear();
    np->step = p->step + 1;
    for ( ; p && !p->nxt[w]; p = p->fa ) { p->nxt[w] = np; }
    if ( !p )
    {
        np->fa = root;
        tot += np->calc();
    }
    else
    {
        if ( p->nxt[w]->step == p->step + 1 )
        {
            np->fa = p->nxt[w];
            tot += np->calc();
        }
        else
        {
            node *q = p->nxt[w], *r = cur++;
            *r = *q;
            tot -= q->calc();
            r->step = p->step + 1;
            q->fa = np->fa = r;
            tot += q->calc() + r->calc() + np->calc();
            for ( ; p && p->nxt[w] == q; p = p->fa ) { p->nxt[w] = r; }
        }
    }
    tail = np;
}


int main()
{
#ifdef  __GNUC__
    freopen ( "in.txt", "r", stdin );
#endif // __GNUC__
    int t;
    scanf ( "%d", &t );
    while ( t-- )
    {
        scanf ( "%s", s );
        int n = strlen ( s );
        for ( int i = 0; i < n; ++i )
        {
            init();
            tot = 0;
            for ( int j = i; j < n; ++j )
            {
                Insert ( s[j] - 'a' );
                ans[i][j] = tot;
            }
        }
        
        int q;
        scanf ( "%d", &q );
        while ( q-- )
        {
            int l, r;
            scanf ( "%d%d", &l, &r );
            l--, r--;
            printf ( "%d\n", ans[l][r] );
        }
    }
    return 0;
}

hdu 1403 Longest Common Substring

每个step姑且认为是转移到该状态时可接收的最大字符串

最长公共子串
#include <stdio.h>
#include <vector>
#include <string.h>
#include <algorithm>
using namespace std;
const int MAXN = 100010;
char s[MAXN];
struct node
{
    node*nxt[26], *fa;
    int step;
    void clear()
    {
        fa = 0, step = 0;
        memset ( nxt, 0, sizeof nxt );
    }
} *root, *tail, nodePool[MAXN << 1], *cur;
void init()
{
    cur = nodePool;
    root = tail = cur++;
    root->clear();
}
void Insert ( int w )
{
    node *p = tail, *np = cur++;
    np->clear();
    np->step = p->step + 1;
    for ( ; p && !p->nxt[w]; p = p->fa ) { p->nxt[w] = np; }
    if ( !p )
    {
        np->fa = root;
    }
    else
    {
        if ( p->nxt[w]->step == p->step + 1 )
        {
            np->fa = p->nxt[w];
        }
        else
        {
            node *q = p->nxt[w], *r = cur++;
            *r = *q;
            r->step = p->step + 1;
            q->fa = np->fa = r;
            for ( ; p && p->nxt[w] == q; p = p->fa ) { p->nxt[w] = r; }
        }
    }
    tail = np;
}
int solve()
{
	int l = strlen(s), w, res = 0, sum= 0;
	node *p = root;
	for (int i = 0; i< l; ++i)
	{
		w = s[i]-'a';
		if ( p->nxt[w])
		{
			++sum;
			p = p->nxt[w];
		}
		else
		{
			while ( p && p->nxt[w] == NULL) p=p->fa;
			if ( p== NULL)
			{
				sum = 0;
				p = root;
			}
			else
			{
				sum = p->step + 1;
				p = p->nxt[w];
			}
		}
		res = max(sum, res);
	}
	return res;
}
int main()
{
#ifdef  __GNUC__
    freopen ( "in.txt", "r", stdin );
#endif // __GNUC__
    while ( scanf("%s", s) != EOF)
    {
        init();
        int l = strlen(s);
        for (int i = 0; i< l; ++i)
		{
			Insert(s[i]-'a'); 
		}
		scanf("%s", s);
		printf("%d\n", solve());
    }
    return 0;
}


hdu 4416 Good Article Good sentence

菜鸟对SAM的理解还是太弱了,借鉴了别人的代码才搞懂

本题用SAM来做,核心是记录下到达每个State时能得到Bi字串的最大长度。

再此还要解释下,引入father指针的目的是最大化状态的重用性,每个状态的父节点表示的串 == 当前状态所表示的字符串最后几位

因此,父节点的deep要更新为自身及子节点deep的最大值

每个节点deep - step 就表示为除去不符合要求串数量的后的值

#include <stdio.h>
#include <vector>
#include <string.h>
#include <algorithm>
using namespace std;
typedef long long LL;
const int MAXN = 100010;
char s[MAXN];
struct node
{
    node*nxt[26], *fa;
    int step;
    int deep;
    void clear()
    {
        fa = 0, step = 0; deep = 0;
        memset ( nxt, 0, sizeof nxt );
    }
} *root, *tail, nodePool[MAXN << 1], *cur;
void init()
{
    cur = nodePool;
    root = tail = cur++;
    root->clear();
}
void Insert ( int w )
{
    node *p = tail, *np = cur++;
    np->clear();
    np->step = p->step + 1;
    for ( ; p && !p->nxt[w]; p = p->fa ) { p->nxt[w] = np; }
    if ( !p )
    {
        np->fa = root;
    }
    else
    {
        if ( p->nxt[w]->step == p->step + 1 )
        {
            np->fa = p->nxt[w];
        }
        else
        {
            node *q = p->nxt[w], *r = cur++;
            *r = *q;
            r->step = p->step + 1;
            q->fa = np->fa = r;
            for ( ; p && p->nxt[w] == q; p = p->fa ) { p->nxt[w] = r; }
        }
    }
    tail = np;
}
void update()
{
    int l = strlen ( s ), w, sum = 0;
    node *p = root;
    for ( int i = 0; i < l; ++i )
    {
        w = s[i] - 'a';
        if ( p->nxt[w] )
        {
            ++sum;
            p = p->nxt[w];
        }
        else
        {
            while ( p && p->nxt[w] == NULL ) { p = p->fa; }
            if ( p == NULL )
            {
                sum = 0;
                p = root;
            }
            else
            {
                sum = p->step + 1;
                p = p->nxt[w];
            }
        }
        p->deep = max ( p->deep, sum );
    }
}
int p[MAXN << 1]; node *op[MAXN << 1];
int main()
{
#ifdef  __GNUC__
    freopen ( "in.txt", "r", stdin );
#endif // __GNUC__
    int t, cs = 0;
    int n;
    scanf ( "%d", &t );
    
    while ( t-- )
    {
        printf ( "Case %d: ", ++cs );
        scanf ( "%d", &n );
        scanf ( "%s", s );
        init();
        int l = strlen ( s );
        for ( int i = 0; i < l; ++i ) { Insert ( s[i] - 'a' ); }
        for ( int i = 0; i < n; ++i )
        {
            scanf ( "%s", s );
            update();
        }
        LL res = 0;
        memset ( p, 0, sizeof p );
        for ( int i = 0; i < cur - root; ++i ) { p[nodePool[i].step]++; }
        for ( int i = 1; i <= tail->step; ++i ) { p[i] += p[i - 1]; }
        for ( int i = 0; i < cur - root; ++i ) { op[--p[nodePool[i].step]] = nodePool + i; }
        for ( int i = cur - root - 1; i > 0; --i )
        {
            node *u = op[i];
            if ( u->deep > 0 )	
            {
                u->fa->deep = max ( u->fa->deep, u->deep );
                if ( u->deep < u->step )
                {
                    res += u->step - u->deep ;
                }
            }
            else
            {
                res += u->step - u->fa->step;
            }
        }
#ifdef __GNUC__
        printf ( "%lld\n", res );
#else
        printf ( "%I64d\n", res );
#endif
    }
    return 0;
}


hdu 4270 Dynamic Lover

关键问题是后缀自动机状态删除,可行的办法就是设置标记表示该状态是否被删除,且同一状态的拷贝点也指向相同的标记

#include <stdio.h>
#include <vector>
#include <string.h>
#include <algorithm>
using namespace std;
const int MAXN = 200020;
char s[MAXN ];
int isDel[MAXN], delCnt;
struct node
{
    node*nxt[26], *fa;
    int step;
    int *isdel;
    int pos;
    node *flg;
    void clear()
    {
        fa = 0, step = 0; isdel = 0; flg = 0;
        memset ( nxt, 0, sizeof nxt );
    }
} *root, *tail, nodePool[MAXN << 1], *cur, *mseq[MAXN], *cnt;
void init()
{
    memset ( isDel, 0, sizeof isDel );    
    cur = nodePool;
    root = tail = cur++;
    root->clear();
    mseq[0] = root;
    root->isdel = isDel;
    delCnt = 1;
}
int judge ( node *x )
{
    return x == NULL || *x->isdel;
}
void Insert ( int w )
{
    node *p = tail, *np = cur++;
    np->clear();
    np->step = p->step + 1;
    np->pos = np->step;
    mseq[np->pos] = np;
    np->isdel = isDel + delCnt++;
    for ( ; p && judge ( p->nxt[w] ); p = p->fa ) { p->nxt[w] = np; }
    if ( !p )
    {
        np->fa = root;
    }
    else
    {
        if ( p->nxt[w]->step == p->step + 1 )
        {
            np->fa = p->nxt[w];
        }
        else
        {
            node *q = p->nxt[w], *r = cur++;
            *r = *q;
            r->step = p->step + 1;
            q->fa = np->fa = r;
            for ( ; p && p->nxt[w] == q; p = p->fa ) { p->nxt[w] = r; }
        }
    }
    tail = np;
}
int flag, tl;
void dfs ( node *k, int l )
{
    if ( flag ) { return; }
    if ( l == tl ) {flag = k->pos - l + 1; return; }
    if ( k->flg == cnt )
    {
        flag = cnt->pos - l + 1;
        return;
    }
    for ( int i = 0; !flag && i < 26; ++i )
    {
        if ( k->nxt[i] && !*k->nxt[i]->isdel )
        {
            dfs ( k->nxt[i], l + 1 );
        }
    }

}

int main()
{
#ifdef  __GNUC__
    freopen ( "in.txt", "r", stdin );
#endif // __GNUC__
    int n, c, k;
    char *p;
    while ( scanf ( "%s", s ) != EOF )
    {
        init();
        for ( p = s; *p; ++p ) { Insert ( *p - 'a' ); }
        scanf ( "%d", &n );
        while ( n-- )
        {
            scanf ( "%d", &c );
            if ( c == 1 )
            {
                scanf ( "%s", s );
                char *p = s;
                for ( ; *p; ++p ) { Insert ( *p - 'a' ); }
            }
            else if ( c == 2 )
            {
                cnt = tail;
                for ( node *as = tail; as != root ; as = as->fa )
                {
                    as->flg = cnt;
                }
                scanf ( "%d", &k );
                flag = 0; tl = k;
                dfs ( root, 0 );
                printf ( "%d\n", flag );
            }
            else
            {
                scanf ( "%d", &k );
                while ( k-- )
                {
                    *tail->isdel = 1;
                    tail = mseq[tail->step - 1];
                }
            }
        }
    }
    return 0;
}



你可能感兴趣的:(后缀自动机合集 I)