不可重叠最长重复子串
poj1743 Musical Theme
/*********************************\
* @prob: poj1743 Musical Theme *
* @auth: Wang Junji *
* @stat: Accepted. *
* @date: June. 15th, 2012 *
* @memo: 后缀数组 *
\*********************************/
#include
#include
#include
#include
#include
const int maxN = 20010;
typedef int arr[maxN];
arr wa, wb, ws, wv, sa, r, rank, height;
int n;
inline int& gmin(int& a, const int& b) {return a < b ? a : (a = b);}
inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);}
inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
inline void get_sa(int* r, int* sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
for (int j = 1, p = 1; p < n; j <<= 1, m = p)
{
p = 0;
for (int i = n - j; i < n; ++i) y[p++] = i;
for (int i = 0; i < n; ++i)
if (sa[i] - j > -1) y[p++] = sa[i] - j;
for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[wv[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
std::swap(x, y); x[sa[0]] = 0; p = 1;
for (int i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
}
return;
}
inline void get_height(int* r, int* sa, int n)
{
int k = 0;
for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
for (int i = 0; i < n; height[rank[i++]] = k)
{
int j = sa[rank[i] - 1]; if (k) --k;
while (r[i + k] == r[j + k]) ++k;
}
return;
}
inline bool check(int k)
{
int min_sa = sa[1], max_sa = sa[1];
for (int i = 2; i < n + 1; ++i)
{
if (height[i] < k)
{
if (max_sa - min_sa >= k) return 1;
else max_sa = min_sa = sa[i];
}
else gmax(max_sa, sa[i]), gmin(min_sa, sa[i]);
}
return max_sa - min_sa >= k;
}
int main()
{
freopen("Musical_Theme.in", "r", stdin);
freopen("Musical_Theme.out", "w", stdout);
while (scanf("%d", &n) && n)
{
for (int i = 0; i < n; ++i) scanf("%d", r + i);
for (int i = 0; i < n - 1; ++i) r[i] -= r[i + 1] - 100;
r[--n] = 0;
get_sa(r, sa, n + 1, 200);
get_height(r, sa, n);
int L = 4, R = n + 1, res = 0;
while (L < R)
{
int Mid = (L + R) >> 1;
check(Mid) ? (res = L = Mid + 1) : (R = Mid);
}
printf("%d\n", res);
}
return 0;
}
/*
不可重叠最长重复字串问题。
二分答案k,把排序的后缀分成height值不小于k的若干组,若存在一组中的最大sa值和最小sa值不小于k,那么此k成立,否则不成立。
注意最后height数组的取值是1~n而不是0~n-1。
*/
可重叠的K次最长重复子串
poj3261 Milk Patterns
/**********************************\
* @prob: poj3261 Milk_Patterns *
* @auth: Wang Junji *
* @stat: Accepted. *
* @date: June. 15th, 2012 *
* @memo: 后缀数组 *
\**********************************/
#include
#include
#include
#include
#include
const int maxN = 100010;
typedef int arr[maxN];
arr wa, wb, wv, ws, r, rank, sa, height, tab;
int n, K;
inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
inline void get_sa(int* r, int* sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
for (int j = 1, p = 1; p < n; j <<= 1, m = p)
{
p = 0;
for (int i = n - j; i < n; ++i) y[p++] = i;
for (int i = 0; i < n; ++i)
if (sa[i] - j > -1) y[p++] = sa[i] - j;
for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[wv[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
std::swap(x, y); x[sa[0]] = 0; p = 1;
for (int i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
}
return;
}
inline void get_height(int* r, int* sa, int n)
{
int k = 0;
for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
for (int i = 0; i < n; height[rank[i++]] = k)
{
int j = sa[rank[i] - 1]; if (k) --k;
while (r[i + k] == r[j + k]) ++k;
}
return;
}
inline bool check(int k)
{
int pst = 1;
for (int i = 2; i < n + 1; ++i) if (height[i] < k)
{
if (i - pst >= K) return 1;
else pst = i;
}
return n + 1 - pst >= K;
} //
int main()
{
freopen("Milk_Patterns.in", "r", stdin);
freopen("Milk_Patterns.out", "w", stdout);
scanf("%d%d", &n, &K);
for (int i = 0; i < n; ++i) scanf("%d", r + i), tab[i] = r[i];
std::sort(tab, tab + n);
int cnt = std::unique(tab, tab + n) - tab;
for (int i = 0; i < n; ++i)
r[i] = std::lower_bound(tab, tab + cnt, r[i]) - tab + 1;
r[n] = 0;
get_sa(r, sa, n + 1, cnt + 1);
get_height(r, sa, n);
int L = 1, R = n + 1, res = 0;
while (L < R)
{
int Mid = (L + R) >> 1;
check(Mid) ? (res = Mid, L = Mid + 1) : (R = Mid);
}
printf("%d\n", res);
return 0;
}
/*
可重复的K次最长重复字串。
二分答案k,把排序后的后缀分成height值不小于k的若干组,若存在一组的元素个数不少于K,那么此k成立,否则不成立。
(注意k和K代表的含义不同。)
*/
不相同的子串的个数
spoj694 Distinct Substrings
spoj705 New Distinct Substrings
/*****************************\
* @prob: spoj694 & spoj705 *
* @auth: Wang Junji *
* @stat: Accepted. *
* @date: June. 16th, 2012 *
* @memo: 后缀数组 *
\*****************************/
#include
#include
#include
#include
#include
const int maxN = 50010;
typedef int arr[maxN];
char str[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int n, T;
inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
/* cmp */
inline void calc_sa(int* r, int* sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
for (int j = 1, p = 1; p < n; j <<= 1, m = p)
{
p = 0;
for (int i = n - j; i < n; ++i) y[p++] = i;
for (int i = 0; i < n; ++i)
if (sa[i] - j > -1) y[p++] = sa[i] - j;
for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[wv[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
std::swap(x, y); x[sa[0]] = 0; p = 1;
for (int i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
} /* for */
return;
} /* calc_sa */
inline void calc_height(int* r, int* sa, int n)
{
int k = 0;
for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
for (int i = 0; i < n; height[rank[i++]] = k)
{
int j = sa[rank[i] - 1]; if (k) --k;
while (r[i + k] == r[j + k]) ++k;
} /* for */
return;
} /* calc_height */
int main()
{
freopen("substr.in", "r", stdin);
freopen("substr.out", "w", stdout);
scanf("%d", &T);
while (T--)
{
scanf("%s", str); n = strlen(str);
for (int i = 0; i < n; ++i) r[i] = str[i];
r[n] = 0;
calc_sa(r, sa, n + 1, 128);
calc_height(r, sa, n);
int ans = 0;
for (int i = 1; i < n + 1; ++i)
ans += n - sa[i] - height[i];
printf("%d\n", ans);
} /* while */
return 0;
} /* main */
/*
由于原串的子串一定是某个后缀的前缀,那么原问题等价于求出所有后缀中不相同的前缀个数。
原串的每个后缀i贡献出n - i个前缀,那么若按照字典序,则每个后缀sa[i]贡献出n - sa[i] - height[i]个与前面不同的前缀出来,所以只需要将这些值累加即可。
*/
最长回文子串
ural1297 Palindrome
/******************************\
* @prob: NOI1297 Palindrome *
* @auth: Wang Junji *
* @stat: Accepted. *
* @date: June. 16th, 2012 *
* @memo: 后缀数组 *
\******************************/
#include
#include
#include
#include
#include
const int maxN = 2010;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int f[20][maxN], n, pos;
char str[maxN];
inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
/* cmp */
inline void calc_sa(int* r, int* sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
for (int j = 1, p = 1; p < n; j <<= 1, m = p)
{
p = 0;
for (int i = n - j; i < n; ++i) y[p++] = i;
for (int i = 0; i < n; ++i)
if (sa[i] - j > -1) y[p++] = sa[i] - j;
for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[wv[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
std::swap(x, y); x[sa[0]] = 0; p = 1;
for (int i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
} /* for */
return;
} /* calc_sa */
inline void calc_height(int* r, int* sa, int n)
{
int k = 0;
for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
for (int i = 0; i < n; height[rank[i++]] = k)
{
int j = sa[rank[i] - 1]; if (k) --k;
while (r[i + k] == r[j + k] && r[i + k]) ++k;
} /* for */
return;
} /* calc_height */
inline void rmq_init()
{
for (int i = 1; i < n + 1; ++i) f[0][i] = height[i];
for (int q = 0; 1 << q < n; ++q)
for (int i = 1; i + (1 << q) < n + 2; ++i)
f[q + 1][i] = std::min(f[q][i], f[q][i + (1 << q)]);
} /* rmq_init */
inline int LCP(int a, int b)
{
a = rank[a], b = rank[b];
if (a > b) std::swap(a, b); ++a; int q = 0;
while (1 << q < b - a + 2) ++q; --q;
return std::min(f[q][a], f[q][b - (1 << q) + 1]);
} /* LCP */
int main()
{
freopen("Palindrome.in", "r", stdin);
freopen("Palindrome.out", "w", stdout);
scanf("%s", str); pos = strlen(str); str[pos] = ' ';
strncpy(str + pos + 1, str, pos); n = strlen(str);
std::reverse(str + pos + 1, str + n);
for (int i = 0; i < n; ++i) r[i] = str[i] - ' ';
r[n] = 0;
calc_sa(r, sa, n + 1, 128);
calc_height(r, sa, n);
rmq_init();
int ans = 0, res = 0;
for (int i = 0; i < pos; ++i)
{
int ths = LCP(i, n - i - 1); ths <<= 1, --ths;
if (ths > ans) ans = ths, res = i - (ths >> 1);
ths = LCP(i, n - i); ths <<= 1;
if (ths > ans) ans = ths, res = i - (ths >> 1);
} /* for */
for (int i = res; i < res + ans; ++i) putchar(str[i]);
printf("\n");
return 0;
} /* main */
/*
最长回文串。
将原串和反转过后的串与连接起来,中间用一个未出现过的字符连接,于是原问题就变成了求这个新字符串的某两个后缀的最长公共前缀。
枚举中心位置,分奇偶讨论回文串的长度,取出最长的解即可。
*/
连续重复子串
poj2406 Power Strings
/*********************************\
* @prob: poj2406 Power_Strings *
* @auth: Wang Junji *
* @stat: Accepted. *
* @date: June. 15th, 2012 *
* @memo: 暴力匹配 *
\*********************************/
#include
#include
const int maxN = 1000010;
char str[maxN]; int n, ans;
inline bool check(int len)
{
for (int i = 0; i + len < n; ++i)
if (str[i] - str[i + len]) return 0;
return 1;
}
int main()
{
freopen("Power_Strings.in", "r", stdin);
freopen("Power_Strings.out", "w", stdout);
while (scanf("%s", str) != EOF && strcmp(str, "."))
{
n = strlen(str);
for (int i = 1; i < n + 1; ++i)
if (n % i == 0 && check(i)) {ans = n / i; break;}
printf("%d\n", ans);
}
return 0;
}
重复次数最多的连续重复子串
poj3693 Maximum repetition substring
/************************************************\
* @prob: poj3693 Maximum repetition substring *
* @auth: Wang Junji * @stat: Accepted. *
* @date: June. 15th, 2012 * @memo: 后缀数组 *
\************************************************/
#include
#include
#include
#include
#include
const int maxN = 100010;
typedef int arr[maxN];
arr wa, wb, wv, ws, r, rank, height, sa, tab;
int f[20][maxN], n, top; char str[maxN];
inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
inline void get_sa(int* r, int* sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
for (int j = 1, p = 1; p < n; j <<= 1, m = p)
{
p = 0;
for (int i = n - j; i < n; ++i) y[p++] = i;
for (int i = 0; i < n; ++i)
if (sa[i] - j > -1) y[p++] = sa[i] - j;
for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[wv[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
std::swap(x, y); x[sa[0]] = 0; p = 1;
for (int i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
}
return;
}
inline void get_height(int* r, int* sa, int n)
{
int k = 0;
for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
for (int i = 0; i < n; height[rank[i++]] = k)
{
int j = sa[rank[i] - 1]; if (k) --k;
while (r[i + k] == r[j + k]) ++k;
}
return;
}
inline void rmq_init()
{
for (int i = 1; i < n + 1; ++i) f[0][i] = height[i];
for (int q = 0; 1 << q < n; ++q)
for (int i = 1; i + (1 << q) < n + 2; ++i)
f[q + 1][i] = std::min(f[q][i], f[q][i + (1 << q)]);
return;
}
inline int LCP(int a, int b)
{
a = rank[a], b = rank[b];
if (a > b) std::swap(a, b); ++a;
int q = 0; for (; 1 << q < b - a + 2; ++q); --q;
return std::min(f[q][a], f[q][b - (1 << q) + 1]);
}
int main()
{
freopen("substr.in", "r", stdin);
freopen("substr.out", "w", stdout);
int Case = 0;
while (scanf("%s", str) != EOF && strcmp(str, "#"))
{
n = strlen(str);
for (int i = 0; i < n; ++i) r[i] = str[i] - 'a' + 1;
r[n] = 0;
get_sa(r, sa, n + 1, 27);
get_height(r, sa, n);
rmq_init();
int _cnt = 1, _pos = 0, _len = n;
for (int len = 1; len < n; ++len)
for (int i = 0; i + len < n; i += len)
{
int K = LCP(i, i + len), cnt = K / len + 1, pos = i - len + K % len;
if (pos > -1 && K % len && LCP(pos, pos + len) >= K) ++cnt;
if (cnt > _cnt) _cnt = cnt, tab[(top = 0)++] = len;
if (cnt == _cnt) tab[top++] = len;
}
bool flag = 0;
for (int i = 1; i < n + 1 && !flag; ++i)
{
int ths = sa[i];
for (int j = 0; j < top; ++j)
if (LCP(ths, ths + tab[j]) / tab[j] + 1 == _cnt)
{
_pos = ths, _len = tab[j]; flag = 1;
break;
}
}
printf("Case %d: ", ++Case);
for (int i = _pos; i < _pos + _cnt * _len; ++i) putchar(str[i]);
printf("\n");
}
return 0;
}
/*
重复次数最多的连续重复子串。
枚举长度len(即重复字串的循环节),然后求出长度为len的子串最多能出现几次。
设长度为len的子串在原串中出现了cnt次,那么这个长度为len * cnt的子串中一定包含了str[0], str[len], str[len * 2], ...中的cnt个,所以只需要看str[i]和str[i + len]往前和往后各能匹配多远。记能够匹配的总长度为K,那么cnt = K / len + 1,若K不能被len整除,则还需要看str[i - len + K % len]和str[i + K % len]能匹配多远,若能够匹配的长度不小于k,那么令此时的cnt加1。
要保证字典序,需要将所有重复了cnt次的可能的循环节长度全部记录下来。然后按后缀数组的顺序从头开始枚举起始位置,并且对于每一个起始位置都枚举一遍所有可能的循环节长度,第一次找到的符合要求的解即为最终的解。
*/
最长公共子串
poj2774 Long Long Message
/*************************************\
* @prob: poj2774 Long Long Message *
* @auth: Wang Junji *
* @stat: Accepted. *
* @date: June. 15th, 2012 *
* @memo: 后缀数组 *
\*************************************/
#include
#include
#include
#include
#include
const int maxN = 200010;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int n, pos; char str[maxN];
inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
inline void get_sa(int* r, int* sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
for (int j = 1, p = 1; p < n; j <<= 1, m = p)
{
p = 0;
for (int i = n - j; i < n; ++i) y[p++] = i;
for (int i = 0; i < n; ++i)
if (sa[i] - j > -1) y[p++] = sa[i] - j;
for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[wv[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
std::swap(x, y); x[sa[0]] = 0; p = 1;
for (int i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
}
return;
}
inline void get_height(int* r, int* sa, int n)
{
int k = 0;
for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
for (int i = 0; i < n; height[rank[i++]] = k)
{
int j = sa[rank[i] - 1]; if (k) --k;
while (r[i + k] == r[j + k]) ++k;
}
return;
}
inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);}
int main()
{
freopen("message.in", "r", stdin);
freopen("message.out", "w", stdout);
scanf("%s", str);
pos = strlen(str);
strcat(str, " ");
scanf("%s", str + pos + 1);
n = strlen(str);
for (int i = 0; i < n; ++i) r[i] = str[i];
get_sa(r, sa, n + 1, 128);
get_height(r, sa, n);
int ans = 0;
for (int i = 1; i < n + 1; ++i)
if ((sa[i] < pos && sa[i - 1] > pos) ||
(sa[i] > pos && sa[i - 1] < pos))
gmax(ans, height[i]);
printf("%d\n", ans);
return 0;
}
/*
最长公共子串。
把两个字符串连接在一起,中间用一个比特殊字符隔开(比任何字符都小)。
根据height数组来找,若相邻的两个后缀分别属于两个字符串(即一个在特殊字符前一个在特殊字符后),那么取所有满足此条件的最大height值。
*/
长度不小于K的公共子串的个数
poj3415 Common Substrings
/*************************************\
* @prob: poj3415 Common Substrings *
* @auth: Wang Junji *
* @stat: Time Limit Exceeded. *
* @date: June. 16th, 2012 *
* @memo: 后缀数组 *
\*************************************/
#include
#include
#include
#include
#include
const int maxN = 200010;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height, sta;
char str[maxN]; int f[20][maxN], n, pos, K;
inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
inline void get_sa(int* r, int* sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
for (int j = 1, p = 1; p < n; j <<= 1, m = p)
{
p = 0;
for (int i = n - j; i < n; ++i) y[p++] = i;
for (int i = 0; i < n; ++i)
if (sa[i] - j > -1) y[p++] = sa[i] - j;
for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[wv[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
std::swap(x, y); x[sa[0]] = 0; p = 1;
for (int i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
}
return;
}
inline void get_height(int* r, int* sa, int n)
{
int k = 0;
for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
for (int i = 0; i < n; height[rank[i++]] = k)
{
int j = sa[rank[i] - 1]; if (k) --k;
while (r[i + k] == r[j + k]) ++k;
}
return;
}
inline void rmq_init()
{
for (int i = 1; i < n + 1; ++i) f[0][i] = height[i];
for (int q = 0; 1 << q < n; ++q)
for (int i = 1; i + (1 << q) < n + 2; ++i)
f[q + 1][i] = std::min(f[q][i], f[q][i + (1 << q)]);
return;
}
inline int LCP(int a, int b)
{
a = rank[a], b = rank[b];
if (a > b) std::swap(a, b); ++a;
int q = 0; while (1 << q < b - a + 2) ++q; --q;
return std::min(f[q][a], f[q][b - (1 << q) + 1]);
}
int main()
{
freopen("common_substr.in", "r", stdin);
freopen("common_substr.out", "w", stdout);
while (scanf("%d", &K) != EOF && K)
{
scanf("%s", str); pos = strlen(str); strcat(str, " ");
scanf("%s", str + pos + 1); n = strlen(str);
for (int i = 0; i < n; ++i) r[i] = str[i];
r[n] = 0;
get_sa(r, sa, n + 1, 128);
get_height(r, sa, n);
rmq_init();
int top = 0, ans = 0;
for (int i = 1; i < n + 1; ++i)
{
if (height[i] < K) top = 0;
if (sa[i] > pos)
for (int j = 0; j < top; ++j)
ans += LCP(sta[j], sa[i]) - K + 1;
if (sa[i] < pos) sta[top++] = sa[i];
}
top = 0;
for (int i = 1; i < n + 1; ++i)
{
if (height[i] < K) top = 0;
if (sa[i] < pos)
for (int j = 0; j < top; ++j)
ans += LCP(sta[j], sa[i]) - K + 1;
if (sa[i] > pos) sta[top++] = sa[i];
}
printf("%d\n", ans);
}
return 0;
}
/*
长度不小于K的公共子串的个数。
首先按照height值不小于K的原则分组,然后在每一组当中统计每组中后缀之间的最长公共前缀之和。扫描一遍,每遇到一个B就统计与之前的A的后缀能产生多少个长度不小于K的公共子串。
*/
每个字符串至少出现两次且互不重叠的最长子串
spoj220 Relevant Phrases of Annihilation
/**********************************************************\
* @prob: spoj220 Relevant Phrases of Annihilation *
* @auth: Wang Junji * @stat: Time Limit Exceeded. *
* @date: June. 16th, 2012 * @memo: 后缀数组 *
\**********************************************************/
#include
#include
#include
#include
#include
const int maxN = 100010, maxM = 20;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int pos[maxM], len[maxM], n, N, T;
char str[maxN];
inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
/* cmp */
inline void calc_sa(int* r, int* sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
for (int j = 1, p = 1; p < n; j <<= 1, m = p)
{
p = 0;
for (int i = n - j; i < n; ++i) y[p++] = i;
for (int i = 0; i < n; ++i)
if (sa[i] - j > -1) y[p++] = sa[i] - j;
for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[wv[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
std::swap(x, y); x[sa[0]] = 0; p = 1;
for (int i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
} /* for */
return;
} /* calc_sa */
inline void calc_height(int* r, int* sa, int n)
{
int k = 0;
for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
for (int i = 0; i < n; height[rank[i++]] = k)
{
int j = sa[rank[i] - 1]; if (k) --k;
while (r[i + k] == r[j + k] && r[i + k]) ++k;
} /* for */
return;
} /* calc_height */
inline int plc(const int& x)
{return std::upper_bound(pos, pos + N, x) - pos - 1;}
/* plc */
inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);} /* gmax */
inline int& gmin(int& a, const int& b) {return a < b ? a : (a = b);} /* gmin */
inline bool check(int k)
{
static int min_sa[maxM], max_sa[maxN];
memset(min_sa, 0x3f, sizeof min_sa);
memset(max_sa, 0xff, sizeof max_sa);
int tmp = plc(sa[1]);
min_sa[tmp] = max_sa[tmp] = sa[1];
for (int i = 2; i < n + 1; ++i)
{
if (height[i] < k)
{
bool flag = 1;
for (int j = 0; j < N; ++j)
if (max_sa[j] - min_sa[j] < k)
{
flag = 0; break;
} /* if */
if (flag) return 1;
memset(min_sa, 0x3f, sizeof min_sa);
memset(max_sa, 0xff, sizeof max_sa);
int tmp = plc(sa[i]);
min_sa[tmp] = max_sa[tmp] = sa[i];
continue;
} /* if */
int tmp = plc(sa[i]);
gmin(min_sa[tmp], sa[i]),
gmax(max_sa[tmp], sa[i]);
} /* for */
bool flag = 1;
for (int j = 0; j < N; ++j)
if (max_sa[j] - min_sa[j] < k)
{
flag = 0; break;
} /* if */
return flag;
} /* check */
int main()
{
freopen("phrases.in", "r", stdin);
freopen("phrases.out", "w", stdout);
scanf("%d", &T);
while (T--)
{
scanf("%d", &N); int pst = 0, max_len = 0;
for (int i = 0; i < N; ++i)
{
pos[i] = pst;
scanf("%s", str + pst);
len[i] = strlen(str + pst);
gmax(max_len, len[i]);
str[pst + len[i]] = ' ';
pst += len[i] + 1;
} /* for */
n = strlen(str), str[n--] = 0;
for (int i = 0; i < n; ++i) r[i] = str[i] - ' ';
calc_sa(r, sa, n + 1, 128);
calc_height(r, sa, n);
int L = 0, R = max_len + 1, res = 0;
while (L < R)
{
int Mid = (L + R) >> 1;
check(Mid) ? (res = Mid, L = Mid + 1) : (R = Mid);
} /* while */
printf("%d\n", res);
} /* while */
return 0;
} /* main */
/*
每个字符串至少出现两次且不重叠的最长子串。
二分答案K,按height值分组,然后判断每一组中的字符串是否在每一个串中都出现过,并且在每一个串中出现的最大位置和最小位置之差不小于K。
*/
出现在大于一半的字符串中的最长子串
poj3294 Life Forms
/************************************\
* @prob: poj3294 Life_Forms *
* @auth: Wang Junji *
* @stat: Accepted. *
* @date: June. 16th, 2012 *
* @memo: 后缀数组 *
\************************************/
#include
#include
#include
#include
#include
#include
using std::upper_bound;
const int maxN = 120010, maxM = 110, maxL = 1010;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int len[maxM], pos[maxM], n, N;
char tmp_str[maxM][maxL], str[maxN];
std::bitset marked;
inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
inline void calc_sa(int* r, int* sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
for (int j = 1, p = 1; p < n; j <<= 1, m = p)
{
p = 0;
for (int i = n - j; i < n; ++i) y[p++] = i;
for (int i = 0; i < n; ++i)
if (sa[i] - j > -1) y[p++] = sa[i] - j;
for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
for (int i = 0; i < m; ++i) ws[i] = 0;
for (int i = 0; i < n; ++i) ++ws[wv[i]];
for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
std::swap(x, y); x[sa[0]] = 0; p = 1;
for (int i = 1; i < n; ++i)
x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
}
return;
}
inline void calc_height(int* r, int* sa, int n)
{
int k = 0;
for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
for (int i = 0; i < n; height[rank[i++]] = k)
{
int j = sa[rank[i] - 1]; if (k) --k;
while (r[i + k] == r[j + k] && r[i + k]) ++k;
}
return;
}
inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);}
inline bool check(int k)
{
marked.reset();
marked.set(upper_bound(pos, pos + N, sa[1]) - pos - 1);
for (int i = 2; i < n + 1; ++i)
{
if (height[i] < k)
{
if (marked.count() > N >> 1) return 1;
marked.reset();
}
marked.set(upper_bound(pos, pos + N, sa[i]) - pos - 1);
}
return marked.count() > N >> 1;
}
int main()
{
freopen("Life_Forms.in", "r", stdin);
freopen("Life_Forms.out", "w", stdout);
while (scanf("%d", &N) != EOF && N)
{
if (N == 1)
{
scanf("%s", str);
puts(str);
printf("\n");
continue;
}
int max_len = 0;
for (int i = 0; i < N; ++i)
scanf("%s", tmp_str[i]),
len[i] = strlen(tmp_str[i]),
gmax(max_len, len[i]);
pos[0] = 0;
strcpy(str, tmp_str[0]);
str[len[0]] = ' ';
for (int i = 1; i < N; ++i)
{
pos[i] = pos[i - 1] + (len[i - 1] + 1);
str[pos[i] - 1] = ' ';
strcpy(str + pos[i], tmp_str[i]);
}
n = strlen(str);
for (int i = 0; i < n; ++i) r[i] = str[i] - ' ';
r[n] = 0;
calc_sa(r, sa, n + 1, 128);
calc_height(r, sa, n);
int L = 0, R = max_len + 1, res = 0;
while (L < R)
{
int Mid = (L + R) >> 1;
check(Mid) ? (res = Mid, L = Mid + 1) : (R = Mid);
}
if (!res) printf("?\n\n");
else
{
marked.reset();
marked.set(upper_bound(pos, pos + N, sa[1]) - pos - 1);
for (int i = 2; i < n + 1; ++i)
{
if (height[i] < res)
{
if (marked.count() > N >> 1)
{
for (int j = sa[i - 1]; j < sa[i - 1] + res; ++j)
putchar(str[j]);
printf("\n");
}
marked.reset();
}
marked.set(upper_bound(pos, pos + N, sa[i]) - pos - 1);
}
if (marked.count() > N >> 1)
{
for (int j = sa[n]; j < sa[n] + res; ++j)
putchar(str[j]);
printf("\n");
}
printf("\n");
}
}
return 0;
}
/*
出现在大于一半的字符串中的最长子串。
先把所有字符串连接起来,中间用一个没有出现过的字符连接。二分答案的长度k,将后缀分成height值不小于k的若干组,然后看是否至少存在一组中的后缀在大于一半的字符串中出现过,若是,则此k成立,否则不成立。输出时按照字典序(sa的顺序)扫描一遍,将所有符合条件的部分都输出即可。
*/