【HDU 4416 】Good Article Good sentence 后缀数组

Good Article Good sentence
Time Limit: 6000/3000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)
Total Submission(s): 3171 Accepted Submission(s): 914

Problem Description
In middle school, teachers used to encourage us to pick up pretty sentences so that we could apply those sentences in our own articles. One of my classmates ZengXiao Xian, wanted to get sentences which are different from that of others, because he thought the distinct pretty sentences might benefit him a lot to get a high score in his article.
Assume that all of the sentences came from some articles. ZengXiao Xian intended to pick from Article A. The number of his classmates is n. The i-th classmate picked from Article Bi. Now ZengXiao Xian wants to know how many different sentences she could pick from Article A which don’t belong to either of her classmates?Article. To simplify the problem, ZengXiao Xian wants to know how many different strings, which is the substring of string A, but is not substring of either of string Bi. Of course, you will help him, won’t you?

Input
The first line contains an integer T, the number of test data.
For each test data
The first line contains an integer meaning the number of classmates.
The second line is the string A;The next n lines,the ith line input string Bi.
The length of the string A does not exceed 100,000 characters , The sum of total length of all strings Bi does not exceed 100,000, and assume all string consist only lowercase characters ‘a’ to ‘z’.

Output
For each case, print the case number and the number of substrings that ZengXiao Xian can find.

Sample Input

3
2
abab
ab
ba
1
aaa
bbb
2
aaaa
aa
aaa

Sample Output

Case 1: 3
Case 2: 3
Case 3: 1

题意:给一个字符串S和一系列字符串T1~Tn,问在S中有多少个不同子串满足它不是T1~Tn中任意一个字符串的子串。

思路:1.字串问题————后缀数组;
2.lcp可判断重复个数。即:求出A与B重复的子串个数 和 A自身重复的子串个数用A的全部子串个数减去上面求出的值 就是答案
pos[i]代表以i开头的重复串个数!
3.代码实现
a.求出A与B重复的子串个数:计算出 对于每个A的后缀,求出与它最近的B的后缀的LCP的长度(可正反扫两遍,根据*HEIGHT的性质
//1.正搜: 从(B串)开始找与‘相邻’【右】A串的lcp
//2.反搜:从(B串)开始找与‘相邻’【左】A串的lcp
//找A串中的重串

求!!与它最近的B的后缀的LCP的长度!!)
b.再在总数中处理A串的重复个数;
另:(len的字串个数(len)*(len+1)/2);

#include<iostream>
#include<stdio.h>
#include<string.h>
using namespace std;
const int maxn=3000005;
int wa[maxn],r[maxn],wb[maxn],cnt[maxn],rk[maxn],height[maxn],sa[maxn];
int T,pos[maxn];
char s[100005];
long long ans;
int nn,mm;
int len=0;
void DA(int n,int m)
{
    int *x=wa,*y=wb;
    for(int i=0;i<m;i++) cnt[i]=0;;
    for(int i=0;i<n;i++) cnt[x[i]=r[i]]++;
    for(int i=1;i<m;i++) cnt[i]+=cnt[i-1];
    for(int i=n-1;i>=0;i--) sa[--cnt[x[i]]]=i;
    for(int j=1,p=1;p<n;j<<=1,m=p)
    {
        p=0;
        for(int i=n-j;i<n;i++)   y[p++]=i;
        for(int i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j; 
        for(int i=0;i<m;i++)  cnt[i]=0;  
        for(int i=0;i<n;i++) cnt[x[y[i]]]++;
        for(int i=1;i<m;i++) cnt[i]+=cnt[i-1];
        for(int i=n-1;i>=0;i--) sa[--cnt[x[y[i]]]]=y[i];
        swap(x,y);
        x[sa[0]]=0;p=1;
        for(int i=1;i<n;i++) 
        x[sa[i]]=((y[sa[i]]==y[sa[i-1]])&&(y[sa[i]+j]==y[sa[i-1]+j]))?p-1:p++; 
    }

}
void cal_H(int n)
{
     for(int i=1;i<=n;i++) rk[sa[i]]=i;
    int j=0,k=0;
    for(int i=0;i<n;height[rk[i++]]=k)
        for(k?k--:0,j=sa[rk[i]-1];r[i+k]==r[j+k];k++);
    return ;
}
void solve(int m)     //height数组中找重
{
    int tt=maxn;
        for(int i=1;i<=m;i++)      //1.正搜: 从(B串)开始找与‘相邻’【右】A串的lcp
            if(sa[i]<len)
            {
                if(height[i]<tt) tt=height[i];
                if(pos[sa[i]]<tt) pos[sa[i]]=tt;
            }
            else tt=maxn;
        tt=maxn;


        for(int i=m;i>=1;i--)         //2.反搜:从(B串)开始找与‘相邻’【左】A串的lcp
            if(sa[i-1]<len)
            {
                if(height[i]<tt) tt=height[i];
                if(pos[sa[i-1]]<tt) pos[sa[i-1]]=tt;
            }
            else tt=maxn;



        for(int i=1;i<=m;i++)            //找A串中的重串
             if(sa[i]<len&& sa[i-1]<len)
                if(pos[sa[i-1]]<height[i])
                    pos[sa[i-1]]=height[i];



    ans=(long long)len*(long long )(len+1)/2;
    for(int i=0;i<len;i++)
    ans-=pos[i];
}
int main()
{
    scanf("%d",&T);
    for(int ii=1;ii<=T;ii++)
    {
        int mm=0;
        scanf("%d",&nn);
        scanf("%s",s);
        len=strlen(s);
        for(int i=0;s[i];i++)
            r[mm++]=s[i]-'a'+1;                         //连A,B串
        int tot=28;
        for(int i=1;i<=nn;i++)
        {
            r[mm++]=tot++;
            scanf("%s",s);
            for(int j=0;s[j];j++)
                r[mm++]=s[j]-'a'+1;         //加‘#’+i 分隔
        }
        r[mm]=0;
                DA(mm+1,tot+1);
        cal_H(mm);
    // for(int i=1;i<=mm;i++)
    // cout<<height[i]<<" ";
    // cout<<endl;
        memset(pos,0,sizeof(pos));  
        solve(mm);
            printf("Case %d: %I64d\n",ii,ans);  
    }


}

你可能感兴趣的:(poj)