后缀数组学习笔记

我用我的代码风格,改写了一下罪恶的板子,罗穗骞的代码是真的不清真
这个排序的思路依赖基数排序,可以达到\(O(n \log n)\)

我们可以比较第一位,先给第一位排序
第一位是\(2^0\)位,有了每个后缀的前\(2^0\)排序,我们可以给每个后缀的前\(2^1\)排序
给每个后缀的\(2^1\)排序后又可以给每个后缀\(2^2\)位排序……然后我们就给每个后缀排完序了
复杂度是\(O(n \log n)\),因为基数排序的每次复杂度是\(O(n)\)

bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) ++sum[tmpra[i] = str[i]];
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[str[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2 , sc = all) {
        int l = 0;
        for(int k = len - j + 1 ; k <= len ; ++k) tmpsa[++l] = k;
        for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
        for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
        for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
        for(int k = 1 ; k <= len ; ++k) sum[val[k]]++;
        for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
        for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
        swap(tmpra,tmpsa);
        tmpra[sa[1]] = 1;
        all = 1;
        for(int k = 2 ; k <= len ; ++k) 
            tmpra[sa[k]] = cmp(tmpsa,sa[k],sa[k - 1],j) ? all : ++all;
        sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}

然后说如何求后缀和上一个最长公共前缀的排名
可以利用这个性质
\(height[i]\)表示从\(i\)开始的后缀和从\(sa[ra[i] - 1]\)开始的后缀的最长公共前缀的长度
\(height[i] >= height[i - 1] - 1\)
可以这么想,排在\(i - 1\)前面的后缀是\(k\),如果\(height[i - 1] >= 2\)的话而排在\(i\)前面的必然是\(k + 1\)了,那么他们最长公共前缀必然会有\(i - 1\)\(k\)所公共的那部分少一个咯……
然后就可以\(O(n)\)的求出来了

1 求不可重叠的最长重复子串

给出一个字符串,出现两次的子串,这个子串最长的长度,这两个子串不能重叠
我们二分一个答案,可以对后缀按照排名进行分组,height大于等于答案的就分到一组里,然后判断这个组里后缀最靠前的和后缀最靠后差值大于等于这个答案
复杂度是\(O(n \log n)\)

例题 POJ 1743

Musical Theme

Time Limit: 1000MS Memory Limit: 30000K
Total Submissions: 31853 Accepted: 10630

Description

A musical melody is represented as a sequence of N (1<=N<=20000)notes that are integers in the range 1..88, each representing a key on the piano. It is unfortunate but true that this representation of melodies ignores the notion of musical timing; but, this programming task is about notes and not timings.
Many composers structure their music around a repeating &qout;theme&qout;, which, being a subsequence of an entire melody, is a sequence of integers in our representation. A subsequence of a melody is a theme if it:
is at least five notes long
appears (potentially transposed -- see below) again somewhere else in the piece of music
is disjoint from (i.e., non-overlapping with) at least one of its other appearance(s)
Transposed means that a constant positive or negative value is added to every note value in the theme subsequence.
Given a melody, compute the length (number of notes) of the longest theme.
One second time limit for this problem's solutions!

Input

The input contains several test cases. The first line of each test case contains the integer N. The following n integers represent the sequence of notes.
The last test case is followed by one zero.

Output

For each test case, the output file should contain a single line with a single integer that represents the length of the longest theme. If there are no themes, output 0.

Sample Input

30
25 27 30 34 39 45 52 60 69 79 69 60 52 45 39 34 30 26 22 18
82 78 74 70 66 67 64 60 65 80
0

Sample Output

5

Hint

Use scanf instead of cin to reduce the read time.

题解

这道题的意思是,作曲家喜欢转调,然后让你找出两段数,这两段数的一段是另一段加上或减去同一个数可以得到
第一次做这道题的时候,我还是个孩子……然后用的hash加二分,最慢的点也0.163过了……
然后现在学会了后缀数组,把每个值处理成差分,为了差分都是正数我们可以都加上89,留出来1作为末尾数组
但是要求这两段数之间至少有一个数相隔
2 1 3 2 1 3
这个时候长度3是不合法的,因为有个公共数字
所以就要求最靠后的下标减去最靠前的下标是大于二分的答案的,不能等于

代码

#include 
#include 
#include 
#include 
#include 
//#define ivorysi
#define MAXN 20005
using namespace std;
typedef long long ll;

int ra[MAXN],sa[MAXN];
int n,num[MAXN],str[MAXN],tmpa[MAXN],tmpb[MAXN],val[MAXN],sum[MAXN],height[MAXN];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) ++sum[tmpra[i] = str[i]];
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[str[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2 , sc = all) {
        int l = 0;
        for(int k = len - j + 1 ; k <= len ; ++k) tmpsa[++l] = k;
        for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
        for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
        for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
        for(int k = 1 ; k <= len ; ++k) sum[val[k]]++;
        for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
        for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
        swap(tmpra,tmpsa);
        tmpra[sa[1]] = 1;
        all = 1;
        for(int k = 2 ; k <= len ; ++k) 
            tmpra[sa[k]] = cmp(tmpsa,sa[k],sa[k - 1],j) ? all : ++all;
        sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
bool check(int l) {
    int st = -1 , ed;
    for(int i = 3 ; i <= n ; ++i) {
        if(height[sa[i]] >= l) {
            if(st == -1) st = i - 1;
            ed = i;
        }
        else {
            if(st == -1) continue;
            int minn = 0x7fffffff,maxx = 0;
            for(int j = st ; j <= ed ; ++j) {
                maxx = max(maxx,sa[j]);
                minn = min(minn,sa[j]);
            }
            if(maxx - minn > l) return true;
            st = -1;
        }
    }
    if(st != -1) {
        int minn = 0x7fffffff,maxx = 0;
        for(int j = st ; j <= ed ; ++j) {
            maxx = max(maxx,sa[j]);
            minn = min(minn,sa[j]);
        }
        if(maxx - minn > l) return true;
    }
    return false;
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    while(scanf("%d",&n) != EOF) {
        if(n == 0) break;
        for(int i = 1 ; i <= n ; ++i) scanf("%d",&num[i]);
        for(int i = 1 ; i < n ; ++i) str[i] = num[i + 1] - num[i] + 89;
        str[n] = 1;
        suffix(n,180);
        for(int i = 1 ; i < n ; ++i) {
            int k = max(height[i - 1] - 1,0);
            while(i + k <= n && sa[ra[i] - 1] + k <= n && str[i + k] == str[sa[ra[i] - 1] + k]) ++k;
            height[i] = k;
        }
        int l = 0 , r = n;
        while(l < r) {
            int mid = (l + r + 1) >> 1;
            if(check(mid)) l = mid;
            else r = mid - 1;
        }
        if(l < 4) puts("0");
        else printf("%d\n",l + 1);
    }
} 

2 可重叠的k次重复子串

和上一个差不多,但是分组之后要求至少有一组后缀个数大于等于k

例题 POJ 3261

Milk Patterns

Time Limit: 5000MS Memory Limit: 65536K
Total Submissions: 16850 Accepted: 7437
Case Time Limit: 2000MS

Description

Farmer John has noticed that the quality of milk given by his cows varies from day to day. On further investigation, he discovered that although he can't predict the quality of milk from one day to the next, there are some regular patterns in the daily milk quality.
To perform a rigorous study, he has invented a complex classification scheme by which each milk sample is recorded as an integer between 0 and 1,000,000 inclusive, and has recorded data from a single cow over N (1 ≤ N ≤ 20,000) days. He wishes to find the longest pattern of samples which repeats identically at least K (2 ≤ K ≤ N) times. This may include overlapping patterns -- 1 2 3 2 3 2 3 1 repeats 2 3 2 3 twice, for example.
Help Farmer John by finding the longest repeating subsequence in the sequence of samples. It is guaranteed that at least one subsequence is repeated at least K times.

Input

Line 1: Two space-separated integers: N and K
Lines 2..N+1: N integers, one per line, the quality of the milk on day i appears on the ith line.

Output

Line 1: One integer, the length of the longest pattern which occurs at least K times

Sample Input

8 2
1
2
3
2
3
2
3
1

Sample Output

4

题解

就是刚才讲的东西啦

代码

#include 
#include 
#include 
#include 
#include 
//#define ivorysi
#define MAXN 200005
using namespace std;
typedef long long ll;

int ra[MAXN],sa[MAXN];
int n,k,num[MAXN],str[MAXN],tmpa[MAXN],tmpb[MAXN],val[MAXN],sum[MAXN],height[MAXN];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) ++sum[tmpra[i] = str[i]];
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[str[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2 , sc = all) {
        int l = 0;
        for(int k = len - j + 1 ; k <= len ; ++k) tmpsa[++l] = k;
        for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
        for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
        for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
        for(int k = 1 ; k <= len ; ++k) sum[val[k]]++;
        for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
        for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
        swap(tmpra,tmpsa);
        tmpra[sa[1]] = 1;
        all = 1;
        for(int k = 2 ; k <= len ; ++k) 
            tmpra[sa[k]] = cmp(tmpsa,sa[k],sa[k - 1],j) ? all : ++all;
        sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
bool check(int l) {
    int st = -1 , ed;
    for(int i = 2 ; i <= n ; ++i) {
        if(height[sa[i]] >= l) {
            if(st == -1) st = i - 1;
            ed = i;
        }
        else {
            if(st == -1) continue;
            if(ed - st + 1 >= k) return true;
            st = -1;
        }
    }
    if(st != -1) {
        if(ed - st + 1 >= k) return true;
    }
    return false;
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    scanf("%d%d",&n,&k);
    for(int i = 1 ; i <= n ; ++i) {scanf("%d",&num[i]);str[i] = num[i];}
    sort(num + 1 , num + n + 1);
    int z = unique(num + 1,num + n + 1) - num - 1;
    for(int i = 1 ; i <= n ; ++i) str[i] = lower_bound(num + 1 ,num + z + 1,str[i]) - num + 1;
    str[++n] = 1;
    suffix(n,z + 1);
    for(int i = 1 ; i < n ; ++i) {
        int k = max(height[i - 1] - 1,0);
        while(i + k <= n && sa[ra[i] - 1] + k <= n && str[i + k] == str[sa[ra[i] - 1] + k]) ++k;
        height[i] = k;
    }
    int l = 0 , r = n;
    while(l < r) {
        int mid = (l + r + 1) >> 1;
        if(check(mid)) l = mid;
        else r = mid - 1;
    }
    printf("%d\n",l);

} 

3 不相同的子串的个数

可以知道,每一个子串都是一个后缀的前缀,我们按照排名的后缀把后缀加进去,那么这个后缀贡献的新的子串是这个后缀的长度,那么这个后缀的重复的子串就是和上一个排名的后缀的最长公共前缀,减去即可

SPOJ Distinct Substrings

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.
Example

Sample Input:

2
CCCCC
ABABA

Sample Output:

5
9

Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

题解

比较练板
注意一下这个东西不是只有大写字母

代码

#include 
#include 
#include 
#include 
#define MAXN 1005
//#define ivorysi
using namespace std;
typedef long long ll;

int T;
int len,str[MAXN],ra[MAXN],sa[MAXN],tmpa[MAXN],tmpb[MAXN],sum[MAXN],val[MAXN],height[MAXN];
char s[MAXN];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void Suffix(int len,int sc) {
    int *tmpra = tmpa , *tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) sum[tmpra[i] = str[i]]++;
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[tmpra[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2) {
    int l = 0;
    for(int k = len - j + 1 ; k <= len ; ++k) tmpsa[++l] = k;
    for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
    for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
    for(int k = 1 ; k <= len ; ++k) sum[val[k]]++;
    for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
    for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
    swap(tmpsa,tmpra);
    tmpra[sa[1]] = 1;
    all = 1;
    for(int k = 2 ; k <= len ; ++k) {
        tmpra[sa[k]] = cmp(tmpsa,sa[k],sa[k - 1],j) ? all : ++all;
    }
    sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    scanf("%d",&T);
    while(T--) {
    scanf("%s",s + 1);
    len = strlen(s + 1);
    for(int i = 1 ; i <= len ; ++i) {
        str[i] = s[i];
    }
    str[++len] = 1;
    Suffix(len,300);
    for(int i = 1 ; i < len ; ++i) {
        int k = max(0,height[i - 1] - 1);
        while(i + k <= len && sa[ra[i] - 1] + k <=len && str[i + k] == str[sa[ra[i] - 1] + k]) {
        ++k;
        }
        height[i] = k;
    }
    int ans = 0;
    for(int i = 2 ; i <= len ; ++i) {
        ans += len - sa[i] - height[sa[i]];
    }
    printf("%d\n",ans);
    }
}

4 重复次数最多的连续重复子串

我们把串取反存一下,然后再求一遍后缀数组,用来判断这个串向后延伸的大小
我们枚举一个重复的串的长度都是L
然后我们用st表维护一下区间最小值,用来判断两个位置向后延伸的长度大小

然后我们发现,如果一个串长度是L,出现了两次以上,那么它一定包含了str[1],str[L+1],str[L*2+1]...中相邻的两个
就相当于复制一个串,右移了L到旁边,然后判断两个相邻的位置L,R
L和R的最长公共前缀
L和R反向之后的最长公共前缀
然后就能找到这个串的长度是K
答案就是K/L + 1

每次判断是\(\frac{n}{L}\)
根据调和级数,这个东西是\(O(n \log n)\)

例题 SPOJ REPEATS Repeats

A string s is called an (k,l)-repeat if s is obtained by concatenating k>=1 times some seed string t with length l>=1. For example, the string
s = abaabaabaaba
is a (4,3)-repeat with t = aba as its seed string. That is, the seed string t is 3 characters long, and the whole string s is obtained by repeating t 4 times.
Write a program for the following task: Your program is given a long string u consisting of characters ‘a’ and/or ‘b’ as input. Your program must find some (k,l)-repeat that occurs as substring within u with k as large as possible. For example, the input string
u = babbabaabaabaabab
contains the underlined (4,3)-repeat s starting at position 5. Since u contains no other contiguous substring with more than 4 repeats, your program must output the maximum k.

Input

In the first line of the input contains H- the number of test cases (H <= 20). H test cases follow. First line of each test cases is n - length of the input string (n <= 50000), The next n lines contain the input string, one character (either ‘a’ or ‘b’) per line, in order.

Output

For each test cases, you should write exactly one interger k in a line - the repeat count that is maximized.

Example

Input:

1
17
b
a
b
b
a
b
a
a
b
a
a
b
a
a
b
a
b

Output:

4
since a (4, 3)-repeat is found starting at the 5th character of the input string.

题解

就是上面说的做法了

代码

#include 
#include 
#include 
#include 
#define MAXN 100005
//#define ivorysi
using namespace std;
typedef long long ll;

int T;
int len,ra_L[MAXN],sa_L[MAXN],ra_Z[MAXN],sa_Z[MAXN],
    tmpa[MAXN],tmpb[MAXN],sum[MAXN],val[MAXN],
    height_L[MAXN],height_Z[MAXN],str_L[MAXN],str_Z[MAXN],
    st_L[MAXN][25],st_Z[MAXN][25],log2[MAXN];
char s[5];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void Suffix(int *str,int *sa,int *ra,int len,int sc) {
    int *tmpra = tmpa , *tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) sum[tmpra[i] = str[i]]++;
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[tmpra[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2) {
    int l = 0;
    for(int k = len - j + 1 ; k <= len ; ++k) tmpsa[++l] = k;
    for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
    for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
    for(int k = 1 ; k <= len ; ++k) sum[val[k]]++;
    for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
    for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
    swap(tmpsa,tmpra);
    tmpra[sa[1]] = 1;
    all = 1;
    for(int k = 2 ; k <= len ; ++k) {
        tmpra[sa[k]] = cmp(tmpsa,sa[k],sa[k - 1],j) ? all : ++all;
    }
    sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
void calc(int *str,int *sa,int *ra,int *h) {
    for(int i = 1 ; i < len ; ++i) {
    int k = max(0,h[i - 1] - 1);
    while(i + k <= len && sa[ra[i] - 1] + k <= len && str[i + k] == str[sa[ra[i] - 1] + k]) {
        ++k;
    }
    h[i] = k;
    }
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    log2[1] = 0;
    log2[2] = 1;
    for(int i = 3 ; i <= 50000 ; ++i) {
    log2[i] = log2[i / 2] + 1;
    }
    scanf("%d",&T);
    while(T--) {
    scanf("%d",&len);
    for(int i = 1 ; i <= len ; ++i) {
        scanf("%s",s + 1);
        str_L[i] = s[1] - 'a' + 2;
    }
    for(int i = len ; i >= 1 ; --i) {
        str_Z[len - i + 1] = str_L[i];
    }
    ++len;
    str_L[len] = str_Z[len] = 1;
    Suffix(str_L,sa_L,ra_L,len,27);
    Suffix(str_Z,sa_Z,ra_Z,len,27);
    calc(str_L,sa_L,ra_L,height_L);
    calc(str_Z,sa_Z,ra_Z,height_Z);
    for(int i = 1 ; i <= len ; ++i) {
        st_L[i][0] = height_L[sa_L[i]];
        st_Z[i][0] = height_Z[sa_Z[i]];
    }
    for(int j = 1 ; j <= 17 ; ++j) {
        for(int i = 1; i <= len ; ++i) {
        int l = min(i + (1< (len - 1) / i) break;
        int s = (j - 1) * i + 1,t = j * i + 1;
        if(ra_L[s] > ra_L[t]) swap(s,t);
        int z = log2[ra_L[t] - ra_L[s]];
        
        int l = min(st_L[ra_L[s] + 1][z],st_L[ra_L[t] - (1< ra_Z[t]) swap(s,t);
        z = log2[ra_Z[t] - ra_Z[s]];
        l += min(st_Z[ra_Z[s] + 1][z],st_Z[ra_Z[t] - (1<

例题 POJ 3693 Maximum repetition substring

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.
Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.
The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa

Sample Output

Case 1: ababab
Case 2: aa

题解

这个找最大的出现次数可以用上面的算法
但是找这个串在哪呢
可以这么想,我们找到的重复串,我们肯定会遇到第一个所在的位置,我们直接判断这两个位置往后能判断多少个,那么我们直接去找前L个,也就是枚举起始位置
然后这个重复串后边的位置会因为答案不优而没去前L个
那么这样复杂度还是可以保证的

代码

#include 
#include 
#include 
#include 
#define MAXN 100005
//#define ivorysi
using namespace std;
typedef long long ll;

int T;
int len,ra[MAXN],sa[MAXN],st[MAXN][21],str[MAXN],height[MAXN];
int val[MAXN],sum[MAXN],tmpa[MAXN],tmpb[MAXN],log2[MAXN];
char s[MAXN];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void Suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) sum[tmpra[i] = str[i]]++;
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[tmpra[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2) {
    int l = 0;
    for(int k = len - j + 1; k <= len ; ++k) tmpsa[++l] = k;
    for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
    for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
    for(int k = 1 ; k <= len ; ++k) ++sum[val[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
    for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
    swap(tmpra,tmpsa);
    tmpra[sa[1]] = 1;
    all = 1;
    for(int z = 2 ; z <= len ; ++z) 
        tmpra[sa[z]] = cmp(tmpsa,sa[z],sa[z - 1],j) ? all : ++all;
    sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
void Get_height(int len) {
    for(int i = 1 ; i < len ; ++i) {
    int k = max( 0 , height[i - 1] - 1);
    while(i + k <= len && sa[ra[i] - 1] + k <= len && str[i + k] == str[sa[ra[i] - 1] + k]) {
        ++k;
    }
    height[i] = k;
    }
}
void Get_RMQ(int len) {
    for(int i = 1 ; i <= len ; ++i) {
    st[i][0] = height[sa[i]];
    }
    for(int j = 1 ; j <= 20 ; ++j) {
    for(int i = 1 ; i <= len ; ++i) {
        if(i + (1 << j) - 1 > len) break;
        int l = i + (1 << j) - 1;
        st[i][j] = min(st[i][j - 1],st[l - (1 << j - 1) + 1][j - 1]);
    }
    }
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    log2[1] = 0;
    log2[2] = 1;
    for(int i = 3 ; i <= 100000 ; ++i) {
    log2[i] = log2[i / 2] + 1;
    }
    int cnt = 0;
    while(1) {
    scanf("%s",s + 1);
    if(s[1] == '#') break;
    len = strlen(s + 1);
    for(int i = 1 ; i <= len ; ++i) str[i] = s[i] - 'a' + 2;
    str[++len] = 1;
    Suffix(len,27);
    Get_height(len);
    Get_RMQ(len);
    int ans = 1 , al = 1,ar = 1;
    for(int i = 1 ; i < len ; ++i) if(str[i] < str[al]) al = ar = i;
    for(int i = 1 ; i <= len ; ++i) {
        for(int j = 1 ; j <= len ; ++j) {
        int s = (j - 1) * i + 1, t = j * i + 1;
        if(t > len) break;
        if(ra[s] > ra[t]) swap(s,t);
        int z = log2[ra[t] - ra[s]];
        int l = min(st[ra[s] + 1][z],st[ra[t] - (1 << z) + 1][z]);
        if(l / i + 2 < ans) continue;
        int t1 = (j - 1) * i + l;
        for(int k = 1 ; k <= i ; ++k) {
            if(s - k < 1 || t - k < 1) break;
            if(str[s - k] != str[t - k]) break;
            int t0 = min(s - k ,t - k);
            int now = (t1 - t0 + 1) / i + 1;
            if(now > ans) {
            ans = (t1 - t0 + 1) / i + 1;
            al = t0; ar = t0 + now * i - 1;
            }
            else if(now == ans) {
            if(ra[al] > ra[t0]) al = t0,ar = t0 + now * i - 1;
            }
        }
        }
    }
    ++cnt;
    printf("Case %d: ",cnt);
    for(int i = al ; i <= ar ; ++i) putchar('a' + str[i] - 2);
    putchar('\n');
    }
}

5 最长公共子串

把两个串接在一起,那么两个后缀的最长公共前缀就是一个最长公共子串
那么这两个公共子串可能在一个串里
我们只需要特判这个最长公共前缀不是在一个串里就可以用这个更新了

例题 POJ 2774

Long Long Message

Time Limit: 4000MS Memory Limit: 131072K
Total Submissions: 32478 Accepted: 13115
Case Time Limit: 1000MS

Description

The little cat is majoring in physics in the capital of Byterland. A piece of sad news comes to him these days: his mother is getting ill. Being worried about spending so much on railway tickets (Byterland is such a big country, and he has to spend 16 shours on train to his hometown), he decided only to send SMS with his mother.
The little cat lives in an unrich family, so he frequently comes to the mobile service center, to check how much money he has spent on SMS. Yesterday, the computer of service center was broken, and printed two very long messages. The brilliant little cat soon found out:

  1. All characters in messages are lowercase Latin letters, without punctuations and spaces.
  2. All SMS has been appended to each other – (i+1)-th SMS comes directly after the i-th one – that is why those two messages are quite long.
  3. His own SMS has been appended together, but possibly a great many redundancy characters appear leftwards and rightwards due to the broken computer.
    E.g: if his SMS is “motheriloveyou”, either long message printed by that machine, would possibly be one of “hahamotheriloveyou”, “motheriloveyoureally”, “motheriloveyouornot”, “bbbmotheriloveyouaaa”, etc.
  4. For these broken issues, the little cat has printed his original text twice (so there appears two very long messages). Even though the original text remains the same in two printed messages, the redundancy characters on both sides would be possibly different.
    You are given those two very long messages, and you have to output the length of the longest possible original text written by the little cat.
    Background:
    The SMS in Byterland mobile service are charging in dollars-per-byte. That is why the little cat is worrying about how long could the longest original text be.
    Why ask you to write a program? There are four resions:
  5. The little cat is so busy these days with physics lessons;
  6. The little cat wants to keep what he said to his mother seceret;
  7. POJ is such a great Online Judge;
  8. The little cat wants to earn some money from POJ, and try to persuade his mother to see the doctor :(

Input

Two strings with lowercase letters on two of the input lines individually. Number of characters in each one will never exceed 100000.

Output

A single line with a single integer number – what is the maximum length of the original text written by the little cat.

Sample Input

yeshowmuchiloveyoumydearmotherreallyicannotbelieveit
yeaphowmuchiloveyoumydearmother

Sample Output

27

题解

就是刚才那个东西

代码

#include 
#include 
#include 
#include 
#define MAXN 300005
//#define ivorysi
using namespace std;
typedef long long ll;

int T;
int len,ra[MAXN],sa[MAXN],str[MAXN],height[MAXN];
int val[MAXN],sum[MAXN],tmpa[MAXN],tmpb[MAXN],log2[MAXN];
char s[MAXN];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void Suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) sum[tmpra[i] = str[i]]++;
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[tmpra[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2) {
    int l = 0;
    for(int k = len - j + 1; k <= len ; ++k) tmpsa[++l] = k;
    for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
    for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
    for(int k = 1 ; k <= len ; ++k) ++sum[val[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
    for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
    swap(tmpra,tmpsa);
    tmpra[sa[1]] = 1;
    all = 1;
    for(int z = 2 ; z <= len ; ++z) 
        tmpra[sa[z]] = cmp(tmpsa,sa[z],sa[z - 1],j) ? all : ++all;
    sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
void Get_height(int len) {
    for(int i = 1 ; i < len ; ++i) {
    int k = max( 0 , height[i - 1] - 1);
    while(i + k <= len && sa[ra[i] - 1] + k <= len && str[i + k] == str[sa[ra[i] - 1] + k]) {
        ++k;
    }
    height[i] = k;
    }
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    scanf("%s",s + 1);
    int s1 = strlen(s + 1);
    for(int i = 1 ; i <= s1 ; ++i) {
    str[i] = s[i] - 'a' + 3;
    }
    scanf("%s",s + 1);
    int s2 = strlen(s + 1);
    for(int i = 1 ; i <= s2 ; ++i) {
    str[i + s1 + 1 ] = s[i] - 'a' + 3;
    }
    str[s1 + 1] = 2;
    str[s1 + s2 + 2] = 1;
    len = s1 + s2 + 2;
    Suffix(len,28);
    Get_height(len);
    int ans = 0;
    for(int i = 2 ; i <= len ; ++i) {
    int s = sa[i] , t = sa[i - 1];
    if(s > t) swap(s,t);
    if(s <= s1 && t > s1 && t <= s2 + s1 + 1) {
        ans = max(ans,height[sa[i]]);
    }
    }
    printf("%d\n",ans);
}

例题 Ural 1517 Freedom of Choice

Background

Before Albanian people could bear with the freedom of speech (this story is fully described in the problem "Freedom of speech"), another freedom - the freedom of choice - came down on them. In the near future, the inhabitants will have to face the first democratic Presidential election in the history of their country.
Outstanding Albanian politicians liberal Mohammed Tahir-ogly and his old rival conservative Ahmed Kasym-bey declared their intention to compete for the high post.

Problem

According to democratic traditions, both candidates entertain with digging dirt upon each other to the cheers of their voters' approval. When occasion offers, each candidate makes an election speech, which is devoted to blaming his opponent for corruption, disrespect for the elders and terrorism affiliation. As a result the speeches of Mohammed and Ahmed have become nearly the same, and now it does not matter for the voters for whom to vote.
The third candidate, a chairman of Albanian socialist party comrade Ktulhu wants to make use of this situation. He has been lazy to write his own election speech, but noticed, that some fragments of the speeches of Mr. Tahir-ogly and Mr. Kasym-bey are completely identical. Then Mr. Ktulhu decided to take the longest identical fragment and use it as his election speech.

Input

The first line contains the integer number N (1 ≤ N ≤ 100000). The second line contains the speech of Mr. Tahir-ogly. The third line contains the speech of Mr. Kasym-bey. Each speech consists of N capital latin letters.

Output

You should output the speech of Mr. Ktulhu. If the problem has several solutions, you should output any of them.

Example

input

28
VOTEFORTHEGREATALBANIAFORYOU
CHOOSETHEGREATALBANIANFUTURE

output

THEGREATALBANIA

题解

随便输出一组就随便输出咯

代码

#include 
#include 
#include 
#include 
#define MAXN 300005
//#define ivorysi
using namespace std;
typedef long long ll;

int n;
int len,ra[MAXN],sa[MAXN],str[MAXN],height[MAXN];
int val[MAXN],sum[MAXN],tmpa[MAXN],tmpb[MAXN],log2[MAXN];
char s[MAXN];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void Suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) sum[tmpra[i] = str[i]]++;
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[tmpra[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2) {
    int l = 0;
    for(int k = len - j + 1; k <= len ; ++k) tmpsa[++l] = k;
    for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
    for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
    for(int k = 1 ; k <= len ; ++k) ++sum[val[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
    for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
    swap(tmpra,tmpsa);
    tmpra[sa[1]] = 1;
    all = 1;
    for(int z = 2 ; z <= len ; ++z) 
        tmpra[sa[z]] = cmp(tmpsa,sa[z],sa[z - 1],j) ? all : ++all;
    sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
void Get_height(int len) {
    for(int i = 1 ; i < len ; ++i) {
    int k = max( 0 , height[i - 1] - 1);
    while(i + k <= len && sa[ra[i] - 1] + k <= len && str[i + k] == str[sa[ra[i] - 1] + k]) {
        ++k;
    }
    height[i] = k;
    }
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    scanf("%d",&n);
    scanf("%s",s + 1);
    for(int i = 1 ; i <= n ; ++i) {
    str[i] = s[i] - 'A' + 3;
    }
    scanf("%s",s + 1);
    for(int i = 1 ; i <= n ; ++i) {
    str[i + n + 1 ] = s[i] - 'A' + 3;
    }
    str[n + 1] = 2;
    str[2 * n + 2] = 1;
    len = 2 * n + 2;
    Suffix(len,28);
    Get_height(len);
    int ans = 0,tmp = 0;
    for(int i = 2 ; i <= len ; ++i) {
    int s = sa[i] , t = sa[i - 1];
    if(s > t) swap(s,t);
    if(s <= n && t > n && t <= 2 * n + 1) {
        if(ans < height[sa[i]]) {
        ans = height[sa[i]];
        tmp = sa[i];
        }
    }
    }
    for(int i = 1 ; i <= ans ; ++i) putchar('A' + str[i + tmp - 1] - 3);
    putchar('\n');
}

6 长度不小于K的公共子串

我们把这两个串拼在一起,中间用一个没出现过的字符隔开,然后求一遍后缀数组
然后对后缀进行分组,大于等于k的分成一组
在每组里,每遇到一个B的后缀,就统计它和前面A的后缀会产生多少个公共子串
然后对A做一遍同样的操作
这个可以用单调栈维护,维护的方法是栈底到栈顶height值递增,然后弹出的时候维护一下这个height值有多少个
如果遇到不用加进去的后缀就统计答案

POJ 3415 Common Substrings

Time Limit: 5000MS Memory Limit: 65536K
Total Submissions: 12038 Accepted: 4072

Description

A substring of a string T is defined as:
T(i, k)=TiTi+1...Ti+k-1, 1≤i≤i+k-1≤|T|.
Given two strings A, B and one integer K, we define S, a set of triples (i, j, k):
S = {(i, j, k) | k≥K, A(i, k)=B(j, k)}.
You are to give the value of |S| for specific A, B and K.

Input

The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.
1 ≤ |A|, |B| ≤ 105
1 ≤ K ≤ min{|A|, |B|}
Characters of A and B are all Latin letters.

Output

For each case, output an integer |S|.

Sample Input

2
aababaa
abaabaa
1
xx
xx
0

Sample Output

22
5

题解

就是上面说的题

代码

#include 
#include 
#include 
#include 
#define MAXN 300005
//#define ivorysi
using namespace std;
typedef long long ll;

int k;
int len,s1,s2,ra[MAXN],sa[MAXN],str[MAXN],height[MAXN];
int val[MAXN],sum[MAXN],tmpa[MAXN],tmpb[MAXN],log2[MAXN],stk[MAXN],top,siz[MAXN];
ll ans,value[MAXN],sumval;
char s[MAXN];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void Suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) sum[tmpra[i] = str[i]]++;
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[tmpra[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2) {
    int l = 0;
    for(int k = len - j + 1; k <= len ; ++k) tmpsa[++l] = k;
    for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
    for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
    for(int k = 1 ; k <= len ; ++k) ++sum[val[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
    for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
    swap(tmpra,tmpsa);
    tmpra[sa[1]] = 1;
    all = 1;
    for(int z = 2 ; z <= len ; ++z) 
        tmpra[sa[z]] = cmp(tmpsa,sa[z],sa[z - 1],j) ? all : ++all;
    sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
void Get_height(int len) {
    for(int i = 1 ; i < len ; ++i) {
    int k = max( 0 , height[i - 1] - 1);
    while(i + k <= len && sa[ra[i] - 1] + k <= len && str[i + k] == str[sa[ra[i] - 1] + k]) {
        ++k;
    }
    height[i] = k;
    }
}
int change_ch(char c) {
    if(c >= 'a' && c <= 'z') {
        return c - 'a' + 3;
    }
    else {
        return c - 'A' + 29;
    }   
}
void process(int st,int ed,int l,int r) {
    top = 0;sumval = 0;
    for(int i = st + 1 ; i <= ed ; ++i) {
        int cnt = 1;
        while(top >= 1 && height[sa[i]] < height[sa[stk[top]]]) {
            cnt += siz[top];
            sumval -= value[top];
            --top;
        }
        if(sa[i - 1] >= l && sa[i - 1] <= r) {
            --cnt;
        }
        if(cnt != 0) {
            stk[++top] = i;
            value[top] = 1LL * cnt * (height[sa[i]] - k + 1);
            siz[top] = cnt;
            sumval += value[top];
        }
        if(sa[i] >= l && sa[i] <= r) ans += sumval;
    }
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    while(1) {
        scanf("%d",&k);
        if(k == 0) break;
        scanf("%s",s + 1);
        s1 = strlen(s + 1);
        for(int i = 1 ; i <= s1 ; ++i) {
            str[i] = change_ch(s[i]);
        }
        scanf("%s",s + 1);
        s2 = strlen(s + 1);
        for(int i = 1 ; i <= s2 ; ++i) {
            str[i + s1 + 1] = change_ch(s[i]);
        }
        str[s1 + 1] = 2;
        str[s2 + s1 + 2] = 1;
        len = s1 + s2 + 2;
        Suffix(len,70);
        Get_height(len);
        int st = -1 ,ed;
        ans = 0;
        for(int i = 1 ; i <= len ; ++i) {
            if(height[sa[i]] >= k) {
                if(st == -1) st = i - 1;
                ed = i;
            }
            else {
                if(st != -1) {
                    process(st,ed,1,s1);
                    process(st,ed,s1 + 1,s1 + s2 + 1);
                }
                st = -1;
            }
        }
        if(st != -1) {
            process(st,ed,1,s1);
            process(st,ed,s1 + 1,s1 + s2 + 1);
        }
        printf("%lld\n",ans);
    }
}

7 不小于k个字符串的最长子串

把这n个字符串连起来变成一个,然后后缀排序
然后二分答案,对后缀分组,如果一个组里面的后缀出现是出现在不同的k个字符串里,那么这个答案就是合法的

例题 POJ 3294 Life Forms

Time Limit: 5000MS Memory Limit: 65536K
Total Submissions: 16725 Accepted: 4927

Description

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.
The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.
Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

Sample Input

3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
0

Sample Output

bcdefg
cdefgh

?

题解

对于统计字符串在二分答案分组的时候顺便记录一下起始位置就行

代码

#include 
#include 
#include 
#include 
#define MAXN 300005
//#define ivorysi
using namespace std;
typedef long long ll;

int n,k;
int len,strl[MAXN],ra[MAXN],sa[MAXN],str[MAXN],height[MAXN];
int val[MAXN],sum[MAXN],tmpa[MAXN],tmpb[MAXN],stt[MAXN],tot,id[MAXN];
char s[MAXN];
bool used[105];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void Suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) sum[tmpra[i] = str[i]]++;
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[tmpra[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2) {
    int l = 0;
    for(int k = len - j + 1; k <= len ; ++k) tmpsa[++l] = k;
    for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
    for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
    for(int k = 1 ; k <= len ; ++k) ++sum[val[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
    for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
    swap(tmpra,tmpsa);
    tmpra[sa[1]] = 1;
    all = 1;
    for(int z = 2 ; z <= len ; ++z) 
        tmpra[sa[z]] = cmp(tmpsa,sa[z],sa[z - 1],j) ? all : ++all;
    sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
void Get_height(int len) {
    for(int i = 1 ; i < len ; ++i) {
    int k = max( 0 , height[i - 1] - 1);
    while(i + k <= len && sa[ra[i] - 1] + k <= len && str[i + k] == str[sa[ra[i] - 1] + k]) {
        ++k;
    }
    height[i] = k;
    }
}
bool check(int z) {
    tot = 0;
    int st = -1,ed = 0;
    for(int i = 1 ; i <= len ; ++i) {
    if(height[sa[i]] >= z) {
        if(st == -1) st = i - 1;
        ed = i;
    }
    else {
        if(st != -1) {
        memset(used,0,sizeof(used));
        int num = 0;
        for(int j = st ; j <= ed ; ++j) {
            if(!used[id[sa[j]]]) {
            used[id[sa[j]]] = 1;
            ++num;
            }
        }
        if(num >= k) {
            stt[++tot] = sa[st];
        }
        }
        st = -1;
    }
    }
    if(st != -1) {
    memset(used,0,sizeof(used));
    int num = 0;
    for(int j = st ; j <= ed ; ++j) {
        if(!used[id[sa[j]]]) {
        used[id[sa[j]]] = 1;
        ++num;
        }
    }
    if(num >= k) {
        stt[++tot] = sa[st];
    }
    }
    return tot > 0;
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    int cnt = 0;
    while(1) {
    scanf("%d",&n);
    if(!n) break;
    ++cnt;
    if(cnt != 1) puts(""); 
    len = 0;
    int l = 0 , r = 0;
    memset(id,0,sizeof(id));
    memset(str,0,sizeof(str));
    for(int i = 1 ; i <= n; ++i) {
        scanf("%s",s + 1);
        strl[i] = strlen(s + 1);
        for(int j = 1 ; j <= strl[i] ; ++j) {
        str[len + j] = s[j] - 'a' + n + 2;
        id[len + j] = i;
        }
        len += strl[i];
        r = max(strl[i],r);
        str[++len] = i + 1;
    }
    str[len] = 1;
    Suffix(len,300);
    Get_height(len);
    k = n / 2 + 1;
    while(l < r) {
        int mid = (l + r + 1) >> 1;
        if(check(mid)) {
        l = mid;
        }
        else {
        r = mid - 1;
        }
    }
    if(l == 0) {
        puts("?");
    }
    else {
        check(l);
        for(int i = 1 ; i <= tot ; ++i) {
        for(int j = 1 ; j <= l ; ++j) {
            putchar('a' + str[stt[i] + j - 1] - n - 2);
        }
        putchar('\n');
        }
    }
    }
}

8 每个字符串至少出现两次不重叠最长子串

做法也是分组,二分答案,分组后判断每组里面是不是n个字符串的后缀都出现了两次以上,并且看看n个字符串每个字符串后缀的出现位置的差值是不是超过了子串长度,为了看看有没有重叠

例题 SPOJ PHRASES Relevant Phrases of Annihilation

You are the King of Byteland. Your agents have just intercepted a batch of encrypted enemy messages concerning the date of the planned attack on your island. You immedietaly send for the Bytelandian Cryptographer, but he is currently busy eating popcorn and claims that he may only decrypt the most important part of the text (since the rest would be a waste of his time). You decide to select the fragment of the text which the enemy has strongly emphasised, evidently regarding it as the most important. So, you are looking for a fragment of text which appears in all the messages disjointly at least twice. Since you are not overfond of the cryptographer, try to make this fragment as long as possible.

Input

The first line of input contains a single positive integer t<=10, the number of test cases. t test cases follow. Each test case begins with integer n (n<=10), the number of messages. The next n lines contain the messages, consisting only of between 2 and 10000 characters 'a'-'z', possibly with some additional trailing white space which should be ignored.

Output

For each test case output the length of longest string which appears disjointly at least twice in all of the messages.

Example

Input:

1
4
abbabba
dabddkababa
bacaba
baba

Output:

2
(in the example above, the longest substring which fulfills the requirements is 'ba')

题解

都在上面说了

代码

#include 
#include 
#include 
#include 
#define MAXN 300005
//#define ivorysi
using namespace std;
typedef long long ll;

int T,n;
int len,ra[MAXN],sa[MAXN],str[MAXN],height[MAXN];
int val[MAXN],sum[MAXN],tmpa[MAXN],tmpb[MAXN],stt[MAXN],tot,id[MAXN];
char s[MAXN];
int used[105],maxx[105],minn[105];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void Suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) sum[tmpra[i] = str[i]]++;
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[tmpra[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2) {
    int l = 0;
    for(int k = len - j + 1; k <= len ; ++k) tmpsa[++l] = k;
    for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
    for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
    for(int k = 1 ; k <= len ; ++k) ++sum[val[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
    for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
    swap(tmpra,tmpsa);
    tmpra[sa[1]] = 1;
    all = 1;
    for(int z = 2 ; z <= len ; ++z) 
        tmpra[sa[z]] = cmp(tmpsa,sa[z],sa[z - 1],j) ? all : ++all;
    sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
void Get_height(int len) {
    for(int i = 1 ; i < len ; ++i) {
    int k = max( 0 , height[i - 1] - 1);
    while(i + k <= len && sa[ra[i] - 1] + k <= len && str[i + k] == str[sa[ra[i] - 1] + k]) {
        ++k;
    }
    height[i] = k;
    }
}
bool check_range(int st,int ed,int l) {
    memset(used,0,sizeof(used));
    int num = 0;
    for(int i = st ; i <= ed ; ++i) {
    if(used[id[sa[i]]] < 2) {
        ++num;
        ++used[id[sa[i]]];
    }
    }
    if(num < 2 * n) return false;
    for(int i = 1 ; i <= n ; ++i) {
    minn[i] = 1000000;
    maxx[i] = 0;
    }
    for(int i = st ; i <= ed ; ++i) {
    int k = id[sa[i]];
    minn[k] = min(sa[i],minn[k]);
    maxx[k] = max(sa[i],maxx[k]);
    }
    for(int i = 1 ; i <= n ; ++i) {
    if(maxx[i] - minn[i] < l) return false;
    }
    return true;
}
bool check(int k) {
    int st = -1,ed = 0;
    for(int i = 1 ; i <= len ; ++i) {
    if(height[sa[i]] >= k) {
        if(st == -1) st = i - 1;
        ed = i;
    }
    else {
        if(st != -1) { if(check_range(st,ed,k)) return true;}
        st = -1;
    }
    }
    if(st != -1) { if(check_range(st,ed,k)) return true;}
    return false;
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    scanf("%d",&T);
    while(T--) {
    memset(str,0,sizeof(str));
    memset(id,0,sizeof(id));
    scanf("%d",&n);
    len = 0;
    int l = 0 , r = 0;
    for(int i = 1; i <= n; ++i) {
        scanf("%s",s + 1);
        int l1 = strlen(s + 1);
        for(int j = 1 ; j <= l1 ; ++j) {
        str[len + j] = s[j] - 'a' + n + 2;
        id[len + j] = i;
        }
        len += l1;
        r = max(r,l1);
        str[++len] = i + 1;
    }
    str[len] = 1;
    Suffix(len,100);
    Get_height(len);
    while(l < r) {
        int mid = (l + r + 1) >> 1;
        if(check(mid)) l = mid;
        else r = mid - 1;
    }
    printf("%d\n",l);
    }
}

不知道什么分类的一道题

POJ 3581 Sequence

Time Limit: 5000MS Memory Limit: 65536K
Total Submissions: 7632 Accepted: 1729
Case Time Limit: 2000MS

Description

Given a sequence, {A1, A2, ..., An} which is guaranteed A1 > A2, ..., An, you are to cut it into three sub-sequences and reverse them separately to form a new one which is the smallest possible sequence in alphabet order.
The alphabet order is defined as follows: for two sequence {A1, A2, ..., An} and {B1, B2, ..., Bn}, we say {A1, A2, ..., An} is smaller than {B1, B2, ..., Bn} if and only if there exists such i ( 1 ≤ i ≤ n) so that we have Ai < Bi and Aj = Bj for each j < i.

Input

The first line contains n. (n ≤ 200000)
The following n lines contain the sequence.

Output

output n lines which is the smallest possible sequence obtained.

Sample Input

5
10
1
2
3
4

Sample Output

1
10
2
4
3

Hint

{10, 1, 2, 3, 4} -> {10, 1 | 2 | 3, 4} -> {1, 10, 2, 4, 3}

题解

看样子就是把数字反过来以后排个序,然而还有一些要注意的地方
第一次排序可以在[3,n]里选排名最靠前的一个,因为第一个数是最大的那个
但是还需要第二次排序,第二次排序是把输出完的数输出后再复制到序列后面,再做一次后缀排序
比如说这组数据

9
8 4 -1 5 0 5 0 2 3

反序后是

3 2 0 5 0 5 -1 4 8

我们第一次分了

-1 4 8

下一次找到的是

0 5

然后我们复制一遍就是

3 2 0 5 0 5 3 2 0 5 0 5

我们就可以找到

0 5 0 5

代码

#include 
#include 
#include 
#include 
#define MAXN 300005
//#define ivorysi
using namespace std;
typedef long long ll;

int T,n;
int len,ra[MAXN],sa[MAXN],str[MAXN],height[MAXN],num[MAXN],tot;
int val[MAXN],sum[MAXN],tmpa[MAXN],tmpb[MAXN],ans[5];
bool used[MAXN];
bool cmp(int *arr,int s,int t,int l) {
    return arr[s] == arr[t] && arr[s + l] == arr[t + l];
}
void Suffix(int len,int sc) {
    int *tmpra = tmpa,*tmpsa = tmpb;
    for(int i = 1 ; i <= sc ; ++i) sum[i] = 0;
    for(int i = 1 ; i <= len ; ++i) sum[tmpra[i] = str[i]]++;
    for(int i = 1 ; i <= sc ; ++i) sum[i] += sum[i - 1];
    for(int i = len ; i >= 1 ; --i) sa[sum[tmpra[i]]--] = i;
    int all = 0;
    for(int j = 1 ; all < len ; j *= 2) {
    int l = 0;
    for(int k = len - j + 1; k <= len ; ++k) tmpsa[++l] = k;
    for(int k = 1 ; k <= len ; ++k) if(sa[k] > j) tmpsa[++l] = sa[k] - j;
    for(int k = 1 ; k <= len ; ++k) val[k] = tmpra[tmpsa[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] = 0;
    for(int k = 1 ; k <= len ; ++k) ++sum[val[k]];
    for(int k = 1 ; k <= sc ; ++k) sum[k] += sum[k - 1];
    for(int k = len ; k >= 1 ; --k) sa[sum[val[k]]--] = tmpsa[k];
    swap(tmpra,tmpsa);
    tmpra[sa[1]] = 1;
    all = 1;
    for(int z = 2 ; z <= len ; ++z) 
        tmpra[sa[z]] = cmp(tmpsa,sa[z],sa[z - 1],j) ? all : ++all;
    sc = all;
    }
    for(int i = 1 ; i <= len ; ++i) ra[sa[i]] = i;
}
void Get_height(int len) {
    for(int i = 1 ; i < len ; ++i) {
    int k = max( 0 , height[i - 1] - 1);
    while(i + k <= len && sa[ra[i] - 1] + k <= len && str[i + k] == str[sa[ra[i] - 1] + k]) {
        ++k;
    }
    height[i] = k;
    }
}
int main() {
#ifdef ivorysi
    freopen("f1.in","r",stdin);
#endif
    scanf("%d",&n);
    for(int i = 1 ; i <= n ; ++i) {
    scanf("%d",&num[i]);
    str[i] = num[i];
    }
    sort(num + 1 ,num + n + 1);
    tot = unique(num + 1 , num + n + 1) - num - 1;
    for(int i = 1 ; i <= n ; ++i) {
    str[i] = lower_bound(num + 1,num + tot + 1,str[i]) - num + 1;
    }
    for(int i = 1 ; i <= n/2 ; ++i) {
    swap(str[i],str[n - i + 1]);
    }
    str[n + 1] = 1;
    Suffix(n + 1,tot + 1);
    int last = 3;
    for(int i = 3 ; i <= n ; ++i) {
    if(ra[i] < ra[last]) last = i;
    }
    for(int i = last ; i <= n ; ++i) printf("%d\n",num[str[i] - 1]);
    for(int i = 1 ; i < last ; ++i) str[last + i - 1] = str[i];
    str[(last - 1) *2 + 1] = 1;
    Suffix((last - 1) *2 + 1,tot + 1);
    int div = 2;
    for(int i = 2 ; i < last ; ++i) {
    if(ra[i] < ra[div]) div = i;
    }
    for(int i = div ; i < last ; ++i) printf("%d\n",num[str[i] - 1]);
    for(int i = 1 ;i < div ; ++i) printf("%d\n",num[str[i] - 1]);
}

转载于:https://www.cnblogs.com/ivorysi/p/9058153.html

你可能感兴趣的:(后缀数组学习笔记)