A string is finite sequence of characters over a non-empty finite set Σ.
In this problem, Σ is the set of lowercase letters.
Substring, also called factor, is a consecutive sequence of characters occurrences at least once in a string.
Now your task is a bit harder, for some given strings, find the length of the longest common substring of them.
Here common substring means a substring of two or more strings.
The input contains at most 10 lines, each line consists of no more than 100000 lowercase letters, representing a string.
The length of the longest common substring. If such string doesn't exist, print "0" instead.
Input: alsdfkjfjkdsal fdjskalajfkdsla aaaajfaaaa Output: 2
Notice: new testcases added
这道题一开始觉得时间复杂度很玄学...
首先说常规做法. 随便拿一个串做成后缀自动机, 然后把其他串往上面跑. 如果失配就跳par. 每个走过的点记录当前这个串跑的时候的这个点被走时的最长长度. 多个串的公共子串的话显然每个点每次跑出来的最长长度取min才是多个串能跑到这里的公共最长长度. 为什么失配跳par? 因为par是right集合刚刚小于你的那个, 并且也是当前点的后缀.
一开始觉得暴力跳par时间复杂度玄学, 于是A了之后自己根据补全AC自动机yy了一个补全后缀自动机, 有点难写不过竟然还A了. 本来想自己出大数据与我之前的比较优越性, 没想到被碾压... 本来以为是随机数据par树很矮, 后来分析了一下可以用势能证明复杂度... 均摊是O(n)的... 因为每次走一步转移就多一个长度, 但是跳一次par至少减少一个长度, 串长为n, 均摊下来也就是O(n)的.
一般写法的:
#include
using namespace std;
const int maxn = 2e5 + 5;
int n;
char ss[maxn];
struct Suffix_Automaton {
int root, last, tot;
int sa[maxn], cnt[maxn];
int c[maxn][26], par[maxn], mx[maxn], mn[maxn], len[maxn];
inline void init() {
tot = 0;
root = last = ++ tot;
memset(mn, 0x3f, sizeof(mn));
}
inline void insert(int idx) {
int p = last, np = last = ++ tot;
len[np] = len[p] + 1, mn[np] = len[np];
while (p && !c[p][idx]) c[p][idx] = np, p = par[p];
if (!p) par[np] = root;
else {
int q = c[p][idx];
if (len[q] == len[p] + 1) par[np] = q;
else {
int nq = ++ tot;
len[nq] = len[p] + 1, mn[nq] = len[nq], par[nq] = par[q];
memcpy(c[nq], c[q], sizeof(c[q]));
par[np] = par[q] = nq;
while (c[p][idx] == q) c[p][idx] = nq, p = par[p];
}
}
}
inline void Radix_sort() {
for (int i = 1; i <= tot; ++ i) cnt[len[i]] ++;
for (int i = 1; i <= n; ++ i) cnt[i] += cnt[i - 1];
for (int i = 1; i <= tot; ++ i) sa[cnt[len[i]] --] = i;
}
inline void match() {
int p = root, ml = 0;
for (int i = 0; ss[i]; ++ i) {
int idx = ss[i] - 'a';
while (p && !c[p][idx]) p = par[p], ml = len[p];
if (!p) {p = root; continue;}
p = c[p][idx], ml ++;
if (ml > mx[p]) mx[p] = ml;
}
for (int i = tot; i > 1; -- i) {
int nw = sa[i];
if (mn[nw] > mx[nw]) mn[nw] = mx[nw];
if (mx[nw]) mx[par[nw]] = len[par[nw]];
mx[nw] = 0;
}
}
inline int solve() {
int ret = 0;
for (int i = 2; i <= tot; ++ i)
if (ret < mn[i]) ret = mn[i];
return ret;
}
}sam;
int main() {
sam.init();
scanf("%s", ss);
n = strlen(ss);
for (int i = 0; i < n; ++ i) sam.insert(ss[i] - 'a');
sam.Radix_sort();
while (scanf("%s", ss) != EOF) sam.match();
printf("%d\n", sam.solve());
return 0;
}
自己yy的补全后缀自动机.
#include
using namespace std;
const int maxn = 2e5 + 5;
int n;
char ss[maxn];
struct Suffix_Automaton {
bool mp[maxn][26];
int root, last, tot;
int sa[maxn], cnt[maxn];
int c[maxn][26], par[maxn], mx[maxn], mn[maxn], len[maxn], in[maxn], g[maxn][26];
inline void init() {
tot = 0;
root = last = ++ tot;
memset(mn, 0x3f, sizeof(mn));
}
inline void insert(int idx) {
int p = last, np = last = ++ tot;
len[np] = len[p] + 1, mn[np] = len[np];
while (p && !c[p][idx]) c[p][idx] = np, p = par[p];
if (!p) par[np] = root;
else {
int q = c[p][idx];
if (len[q] == len[p] + 1) par[np] = q;
else {
int nq = ++ tot;
len[nq] = len[p] + 1, mn[nq] = len[nq], par[nq] = par[q];
memcpy(c[nq], c[q], sizeof(c[q]));
par[np] = par[q] = nq;
while (c[p][idx] == q) c[p][idx] = nq, p = par[p];
}
}
}
inline void Radix_sort() {
for (int i = 1; i <= tot; ++ i) cnt[len[i]] ++;
for (int i = 1; i <= n; ++ i) cnt[i] += cnt[i - 1];
for (int i = 1; i <= tot; ++ i) sa[cnt[len[i]] --] = i;
}
queue q;
inline void bfs() {
for (int i = 1; i <= tot; ++ i)
for (int j = 0; j < 26; ++ j)
if (c[i][j]) ++ in[c[i][j]], mp[i][j] = true, g[i][j] = len[i] + 1;
for (int i = 0; i < 26; ++ i) {
if (!(--in[c[root][i]])) q.push(c[root][i]);
else if (!c[root][i]) c[root][i] = root;
}
while (!q.empty()) {
int u = q.front(); q.pop();
for (int i = 0; i < 26; ++ i) {
int &v = c[u][i];
if (!v) {v = c[par[u]][i]; g[u][i] = g[par[u]][i]; continue;}
if (!(--in[v])) q.push(v);
}
}
}
inline void match() {
int p = root, ml = 0;
for (int i = 0; ss[i]; ++ i) {
int idx = ss[i] - 'a', ori = p;
p = c[p][idx];
if (p != root) {
if (mp[ori][idx]) ml ++;
else ml = g[ori][idx];
if (ml > mx[p]) mx[p] = ml;
} else ml = 0;
}
for (int i = tot; i > 1; -- i) {
int nw = sa[i];
if (mn[nw] > mx[nw]) mn[nw] = mx[nw];
if (mx[nw]) mx[par[nw]] = len[par[nw]];
mx[nw] = 0;
}
}
inline int solve() {
int ret = 0;
for (int i = 2; i <= tot; ++ i)
if (ret < mn[i]) ret = mn[i];
return ret;
}
}sam;
int main() {
sam.init();
scanf("%s", ss);
n = strlen(ss);
for (int i = 0; i < n; ++ i) sam.insert(ss[i] - 'a');
sam.Radix_sort(),sam.bfs() ;
while (scanf("%s", ss) != EOF) sam.match();
printf("%d\n", sam.solve());
return 0;
}