【codechef】Common Strings(后缀数组)

You are given two strings and B. Find the number of distinct strings which appear in both and . A string s is said to appear in S iff s is a substring (appears contiguously) of S.

Input

  • The first line of the input contains an integer T denoting the number of test cases. The description ofT test cases follows.
  • Each test case consists of two lines.
  • The first line contains two space separated integers n1 and n2 denoting the lengths of and B.
  • The second line contains two space separated strings A and B.

Output

  • For each test case output a single number denoting the number of distinct strings appearing in Aand .

Constraints

  • 1 ≤ T ≤ 104
  • ≤ n1, n2 ≤ 105
  • Sum of n1 + n2 over all test cases ≤ 105
  • A is a string consisting of n1 lowercase characters ('a'-'z').
  • B is a string consisting of n2 lowercase characters ('a'-'z').

Example

Input:
2
3 5
aad zaacd
4 4
abcd lmno
Output:
3
0

Explanation

Example case 1. The three strings are "a", "d", "aa".

Example case 2. There are no strings that appear in both A and B.

https://www.codechef.com/IOPC2015/problems/IOPC15G/

kuangbin大神的代码。。还没看懂先瞻仰一下。。

#include <stdio.h>
#include <string.h>
#include <iostream>
#include <algorithm>
#include <vector>
#include <queue>
#include <set>
#include <map>
#include <string>
#include <math.h>
#include <stdlib.h>
#include <time.h>
using namespace std;

/*
*suffix array
*倍增算法  O(n*logn)
*待排序数组长度为n,放在0~n-1中,在最后面补一个0
*da(str ,n+1,sa,rank,height,  ,   );//注意是n+1;
*例如:
*n   = 8;
*num[]   = { 1, 1, 2, 1, 1, 1, 1, 2, $ };注意num最后一位为0,其他大于0
*rank[]  = { 4, 6, 8, 1, 2, 3, 5, 7, 0 };rank[0~n-1]为有效值,rank[n]必定为0无效值
*sa[]    = { 8, 3, 4, 5, 0, 6, 1, 7, 2 };sa[1~n]为有效值,sa[0]必定为n是无效值
*height[]= { 0, 0, 3, 2, 3, 1, 2, 0, 1 };height[2~n]为有效值
*
*/
const int MAXN=200010;
int t1[MAXN],t2[MAXN],c[MAXN];//求SA数组需要的中间变量,不需要赋值
//待排序的字符串放在s数组中,从s[0]到s[n-1],长度为n,且最大值小于m,
//除s[n-1]外的所有s[i]都大于0,r[n-1]=0
//函数结束以后结果放在sa数组中
bool cmp(int *r,int a,int b,int l)
{
	return r[a] == r[b] && r[a+l] == r[b+l];
}
void da(int str[],int sa[],int rank[],int height[],int n,int m)
{
    n++;
    int i, j, p, *x = t1, *y = t2;
    //第一轮基数排序,如果s的最大值很大,可改为快速排序
    for(i = 0;i < m;i++)c[i] = 0;
    for(i = 0;i < n;i++)c[x[i] = str[i]]++;
    for(i = 1;i < m;i++)c[i] += c[i-1];
    for(i = n-1;i >= 0;i--)sa[--c[x[i]]] = i;
    for(j = 1;j <= n; j <<= 1)
    {
        p = 0;
        //直接利用sa数组排序第二关键字
        for(i = n-j; i < n; i++)y[p++] = i;//后面的j个数第二关键字为空的最小
        for(i = 0; i < n; i++)if(sa[i] >= j)y[p++] = sa[i] - j;
        //这样数组y保存的就是按照第二关键字排序的结果
        //基数排序第一关键字
        for(i = 0; i < m; i++)c[i] = 0;
        for(i = 0; i < n; i++)c[x[y[i]]]++;
        for(i = 1; i < m;i++)c[i] += c[i-1];
        for(i = n-1; i >= 0;i--)sa[--c[x[y[i]]]] = y[i];
        //根据sa和x数组计算新的x数组
        swap(x,y);
        p = 1; x[sa[0]] = 0;
        for(i = 1;i < n;i++)
            x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p >= n)break;
        m = p;//下次基数排序的最大值
    }
    int k = 0;
    n--;
	for(i = 0;i <= n;i++)rank[sa[i]] = i;
    for(i = 0;i < n;i++)
    {
        if(k)k--;
        j = sa[rank[i]-1];
        while(str[i+k] == str[j+k])k++;
        height[rank[i]] = k;
    }
}
int rank[MAXN],height[MAXN];

int r[MAXN],sa[MAXN];
char str1[MAXN],str2[MAXN];
bool check(int i,int j,int n,int m){
	return (i < n && j > n) || (i > n && j < n);
}

int main()
{
    int T;
	int n,m;
	scanf("%d",&T);
	while(T--){
		scanf("%d%d",&n,&m);
		scanf("%s%s",str1,str2);
		for(int i = 0;i < n;i++)
			r[i] = str1[i]-'a'+1;
		r[n] = 27;
		for(int i = 0;i < m;i++)
			r[n+1+i] = str2[i]-'a'+1;
		r[n+m+1] = 0;
		da(r,sa,rank,height,n+m+1,28);
		long long ans = 0;
		int tmp = 0;
		for(int i = 2;i <= n+m+1;i++){
			tmp = min(tmp,height[i]);
			if(check(sa[i],sa[i-1],n,m)){
				ans += height[i]-tmp;
				tmp = height[i];
			}
		}
		cout<<ans<<endl;
	}
    return 0;
}


你可能感兴趣的:(【codechef】Common Strings(后缀数组))