[SPOJ]1812 LCS2 后缀自动机

LCS2 - Longest Common Substring II

#suffix-array-8

A string is finite sequence of characters over a non-empty finite set Σ.

In this problem, Σ is the set of lowercase letters.

Substring, also called factor, is a consecutive sequence of characters occurrences at least once in a string.

Now your task is a bit harder, for some given strings, find the length of the longest common substring of them.

Here common substring means a substring of two or more strings.

Input

The input contains at most 10 lines, each line consists of no more than 100000 lowercase letters, representing a string.

Output

The length of the longest common substring. If such string doesn't exist, print "0" instead.

Example

Input:
alsdfkjfjkdsal
fdjskalajfkdsla
aaaajfaaaa

Output:
2

Notice: new testcases added

  这道题一开始觉得时间复杂度很玄学...

  首先说常规做法. 随便拿一个串做成后缀自动机, 然后把其他串往上面跑. 如果失配就跳par. 每个走过的点记录当前这个串跑的时候的这个点被走时的最长长度. 多个串的公共子串的话显然每个点每次跑出来的最长长度取min才是多个串能跑到这里的公共最长长度. 为什么失配跳par? 因为par是right集合刚刚小于你的那个, 并且也是当前点的后缀.

  一开始觉得暴力跳par时间复杂度玄学, 于是A了之后自己根据补全AC自动机yy了一个补全后缀自动机, 有点难写不过竟然还A了. 本来想自己出大数据与我之前的比较优越性, 没想到被碾压... 本来以为是随机数据par树很矮, 后来分析了一下可以用势能证明复杂度... 均摊是O(n)的... 因为每次走一步转移就多一个长度, 但是跳一次par至少减少一个长度, 串长为n, 均摊下来也就是O(n)的.

  一般写法的:

#include
using namespace std;
const int maxn = 2e5 + 5;
int n;
char ss[maxn];
struct Suffix_Automaton {
	int root, last, tot;
	int sa[maxn], cnt[maxn];
	int c[maxn][26], par[maxn], mx[maxn], mn[maxn], len[maxn];
	inline void init() {
		tot = 0;
		root = last = ++ tot;
		memset(mn, 0x3f, sizeof(mn));
	}
	inline void insert(int idx) {
		int p = last, np = last = ++ tot;
		len[np] = len[p] + 1, mn[np] = len[np];
		while (p && !c[p][idx]) c[p][idx] = np, p = par[p];
		if (!p) par[np] = root;
		else {
			int q = c[p][idx];
			if (len[q] == len[p] + 1) par[np] = q;
			else {
				int nq = ++ tot;
				len[nq] = len[p] + 1, mn[nq] = len[nq], par[nq] = par[q];
				memcpy(c[nq], c[q], sizeof(c[q]));
				par[np] = par[q] = nq;
				while (c[p][idx] == q) c[p][idx] = nq, p = par[p];
			}
		}
	}
	inline void Radix_sort() {
		for (int i = 1; i <= tot; ++ i) cnt[len[i]] ++;
		for (int i = 1; i <= n; ++ i) cnt[i] += cnt[i - 1];
		for (int i = 1; i <= tot; ++ i) sa[cnt[len[i]] --] = i;
	}
	inline void match() {
		int p = root, ml = 0;
		for (int i = 0; ss[i]; ++ i) {
			int idx = ss[i] - 'a';
			while (p && !c[p][idx]) p = par[p], ml = len[p];
			if (!p) {p = root; continue;}
			p = c[p][idx], ml ++;
			if (ml > mx[p]) mx[p] = ml;
		}
		for (int i = tot; i > 1; -- i) {
			int nw = sa[i];
			if (mn[nw] > mx[nw]) mn[nw] = mx[nw];
			if (mx[nw]) mx[par[nw]] = len[par[nw]];
			mx[nw] = 0;
		}
	}
	inline int solve() {
		int ret = 0;
		for (int i = 2; i <= tot; ++ i)
			if (ret < mn[i]) ret = mn[i];
		return ret;
	}
}sam;
int main() {
	sam.init();
	scanf("%s", ss);
	n = strlen(ss);
	for (int i = 0; i < n; ++ i) sam.insert(ss[i] - 'a');
	sam.Radix_sort();
	while (scanf("%s", ss) != EOF) sam.match();
	printf("%d\n", sam.solve());
	return 0;
}
  自己yy的补全后缀自动机.

#include
using namespace std;
const int maxn = 2e5 + 5;
int n;
char ss[maxn];
struct Suffix_Automaton {
	bool mp[maxn][26];
	int root, last, tot;
	int sa[maxn], cnt[maxn];
	int c[maxn][26], par[maxn], mx[maxn], mn[maxn], len[maxn], in[maxn], g[maxn][26];
	inline void init() {
		tot = 0;
		root = last = ++ tot;
		memset(mn, 0x3f, sizeof(mn));
	}
	inline void insert(int idx) {
		int p = last, np = last = ++ tot;
		len[np] = len[p] + 1, mn[np] = len[np];
		while (p && !c[p][idx]) c[p][idx] = np, p = par[p];
		if (!p) par[np] = root;
		else {
			int q = c[p][idx];
			if (len[q] == len[p] + 1) par[np] = q;
			else {
				int nq = ++ tot;
				len[nq] = len[p] + 1, mn[nq] = len[nq], par[nq] = par[q];
				memcpy(c[nq], c[q], sizeof(c[q]));
				par[np] = par[q] = nq;
				while (c[p][idx] == q) c[p][idx] = nq, p = par[p];
			}
		}
	}
	inline void Radix_sort() {
		for (int i = 1; i <= tot; ++ i) cnt[len[i]] ++;
		for (int i = 1; i <= n; ++ i) cnt[i] += cnt[i - 1];
		for (int i = 1; i <= tot; ++ i) sa[cnt[len[i]] --] = i;
	}
	queue q;
	inline void bfs() {
		for (int i = 1; i <= tot; ++ i)
			for (int j = 0; j < 26; ++ j)
				if (c[i][j]) ++ in[c[i][j]], mp[i][j] = true, g[i][j] = len[i] + 1;
		for (int i = 0; i < 26; ++ i) {
			if (!(--in[c[root][i]])) q.push(c[root][i]);
			else if (!c[root][i]) c[root][i] = root;
		}
		while (!q.empty()) {
			int u = q.front(); q.pop();
			for (int i = 0; i < 26; ++ i) {
				int &v = c[u][i];
				if (!v)  {v = c[par[u]][i]; g[u][i] = g[par[u]][i]; continue;}
				if (!(--in[v])) q.push(v);
			}
		}
	}
	inline void match() {
		int p = root, ml = 0;
		for (int i = 0; ss[i]; ++ i) {
			int idx = ss[i] - 'a', ori = p;
			p = c[p][idx];
			if (p != root) {
				if (mp[ori][idx]) ml ++;
				else ml = g[ori][idx];
				if (ml > mx[p]) mx[p] = ml;
			} else ml = 0;
		}
		for (int i = tot; i > 1; -- i) {
			int nw = sa[i];
			if (mn[nw] > mx[nw]) mn[nw] = mx[nw];
			if (mx[nw]) mx[par[nw]] = len[par[nw]];
			mx[nw] = 0;
		}
	}
	inline int solve() {
		int ret = 0;
		for (int i = 2; i <= tot; ++ i)
			if (ret < mn[i]) ret = mn[i];
		return ret;
	}
}sam;
int main() {
	sam.init();
	scanf("%s", ss);
	n = strlen(ss);
	for (int i = 0; i < n; ++ i) sam.insert(ss[i] - 'a');
	sam.Radix_sort(),sam.bfs() ;
	while (scanf("%s", ss) != EOF) sam.match();
	printf("%d\n", sam.solve());
	return 0;
}



你可能感兴趣的:(字符串,后缀自动机)