Substring fzu2075

找出串中恰好出现n次的子串,用SAM的话很好做,只要沿最小字符边遍历,找到|right|为n的点即可,但是由于题目没有给字符集,用128的话SAM会超内存,这也是SAM的缺点之一(依赖字符集),后缀数组的做法求出height数组后,从左到右扫描长度为n的子段,假设但前为i ~i+n-1,检查lcp(sa[i],sa[i+n-1]) > max(heigth[i-1], height[i+n]),如果为真,则可以确定lcp(sa[i], sa[i+n-1])恰好出现了n次,因为sa是按照字典序排列的,所以再找到其恰好出现n次的最短前缀就是答案。


#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <queue>
#include <algorithm>
#include <vector>
#include <cstring>
#include <stack>
#include <cctype>
#include <utility>   
#include <map>
#include <string>  
#include <climits> 
#include <set>
#include <string>    
#include <sstream>
#include <utility>   
#include <ctime>

using std::priority_queue;
using std::vector;
using std::swap;
using std::stack;
using std::sort;
using std::max;
using std::min;
using std::pair;
using std::map;
using std::string;
using std::cin;
using std::cout;
using std::set;
using std::queue;
using std::string;
using std::istringstream;
using std::make_pair;
using std::getline;
using std::greater;
using std::endl;
using std::multimap;
using std::deque;

typedef long long LL;
typedef unsigned long long ULL;
typedef pair<int, int> PAIR;
typedef multimap<int, int> MMAP;

const int MAXN(100010);
const int MAXM(5010);
const int MAXE(10010);
const int HSIZE(13131);
const int SIGMA_SIZE(26);
const int MAXH(19);
const int INFI((INT_MAX-1) >> 1);
const int MOD(123456791);
const ULL BASE(31);
const LL LIM(10000000);
const int INV(-10000);

int N;

struct SA
{
	char S[MAXN];
	int sa[MAXN], t1[MAXN], t2[MAXN], cnt[MAXN], len, M;
	void init(int tl, int tm = 128) //tl为原串长度, tm为字符集个数
	{
		len = tl;  
		M = tm;
		int *p1 = t1;
		int *p2 = t2;
		for(int i = 0; i < M; ++i)	cnt[i] = 0;
		for(int i = 0; i <= len; ++i) ++cnt[p1[i] = S[i]];  
		for(int i = 1; i < M; ++i) cnt[i] += cnt[i-1];
		for(int i = len; i >= 0; --i) sa[--cnt[p1[i]]] = i;
		int temp = 1;
		for(int k = 1; temp <= len; k <<= 1)
		{
			temp = 0;
			for(int i = len-k+1; i <= len; ++i) p2[temp++] = i;
			for(int i = 0; i <= len;++i)
				if(sa[i] >= k)
					p2[temp++] = sa[i]-k;

			for(int i = 0; i < M; ++i) cnt[i] = 0;
			for(int i = 0; i <= len; ++i) ++cnt[p1[p2[i]]];
			for(int i = 1; i < M; ++i)	cnt[i] += cnt[i-1];
			for(int i = len; i >= 0; --i) sa[--cnt[p1[p2[i]]]] = p2[i];
			swap(p1, p2);
			temp = 1;
			p1[sa[0]] = 0;
			for(int i = 1; i <= len; ++i)
				p1[sa[i]] = p2[sa[i-1]] == p2[sa[i]] && p2[sa[i-1]+k] == p2[sa[i]+k]? temp-1: temp++;
			M = temp;
		}
	}
	int rank[MAXN], height[MAXN];
	void getHeight()
	{
		int k = 0;
		for(int i = 0; i <= len; ++i)
			rank[sa[i]] = i;
		for(int i = 0; i < len; ++i)
		{
			if(k) --k;
			int j = sa[rank[i]-1];
			while(S[i+k] == S[j+k]) ++k;
			height[rank[i]] = k;
		}
	}
	int Log[MAXN];
	int table[MAXH][MAXN];
	void initLog()
	{
		Log[0] = -1;
		for(int i = 1; i < MAXN; ++i)
			Log[i] = (i&(i-1))? Log[i-1]: Log[i-1]+1;
	}
	void initRMQ()
	{
		for(int i = 1; i <= len; ++i)
			table[0][i] = height[i];
		for(int i = 1; (1 << i) <= len; ++i)
			for(int j = 1; j+(1 << i)-1 <= len; ++j)
				table[i][j] = min(table[i-1][j], table[i-1][j+(1 << (i-1))]);
	}
	int lcp(int a, int b)
	{
		a = rank[a];
		b = rank[b];
		if(a > b) swap(a, b);
		++a;
		int temp = Log[b-a+1];
		return min(table[temp][a], table[temp][b-(1 << temp)+1]);
	}
};

SA sa;


int main()
{
	sa.initLog();
	while(~scanf("%d", &N))
	{
		scanf("%s", sa.S);
		sa.init(strlen(sa.S));
		sa.getHeight();
		sa.initRMQ();
		bool flag(false);
		for(int i = 0; i+N-1 <= sa.len; ++i)
		{
			int temp = N == 1? sa.len-sa.sa[i]: sa.lcp(sa.sa[i], sa.sa[i+N-1]); //n为1需要特判
			if(temp > sa.height[i] && (i+N > sa.len || temp > sa.height[i+N]))
			{
				temp = max(sa.height[i], i+N <= sa.len? sa.height[i+N]: 0)+1; //字典序最小
				for(int j = 0; j < temp; ++j)  
					putchar(sa.S[sa.sa[i]+j]);
				putchar('\n');
				flag = true;
				break;
			}
		}
		if(!flag)
			printf("impossible\n");
	}
	return 0;
}


你可能感兴趣的:(Substring fzu2075)