题意:给一个字符串,求最长的出现至少K次的子串,子串可以重叠。
难度:*
和模型一样,直接上。
CODE:
#include
#include
#include
using namespace std;
const int MAX_N = 20005;
int n, K, a[MAX_N];
void init()
{
scanf("%d%d", &n, &K);
for (int i = 0; i < n; i ++) scanf("%d", &a[i]), a[i] ++;
a[n] = 0;
//for (int i = 0; i < n; i ++) printf("%d ", a[i]); printf("\n");
}
int ws[1000005], wv[MAX_N], wa[MAX_N], wb[MAX_N];
int sa[MAX_N], H[MAX_N], R[MAX_N];
void da(int *a, int *sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1) {
int p = 0;
for (int i = n - k; i < n; i ++) y[p ++] = i;
for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[wv[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
swap(x, y);
p = 1; x[sa[0]] = 0;
for (int i = 1; i < n; i ++)
x[sa[i]] = (y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
if (p >= n) break;
m = p;
}
}
void calc()
{
for (int i = 1; i <= n; i ++) R[sa[i]] = i;
int k = 0, j;
for (int i = 0; i < n; H[R[i ++]] = k)
for (k ? k -- : 0, j = sa[R[i] - 1]; a[i + k] == a[j + k]; k ++);
}
bool check(int x)
{
int cnt = 1;
for (int i = 2; i <= n; i ++) {
if (H[i] < x) cnt = 1;
else cnt ++;
if (cnt >= K) return 1;
}
return 0;
}
void doit()
{
da(a, sa, n + 1, 1000002);
calc();
int l = 0, r = n, mid;
while (l <= r) {
mid = (l + r) >> 1;
if (check(mid)) l = mid + 1;
else r = mid - 1;
}
printf("%d\n", r);
}
int main()
{
//freopen("bzoj1717.in", "r", stdin);
init();
doit();
return 0;
}
题意:给定一个字符串,每次从当前字符串的头,或者尾去一个字符,放入新字符串的尾部,求字典序最小的新字符串。
难度:**
首先每次比较头尾字符,然后去较小的加入的这个贪心方法是错误的。为什么?考虑当两个一样的时候,就不知道怎么办了。如果发现一样,继续往后枚举比较,这样的话最坏的时间复杂度为O(n),非常不优美。其实我们枚举后面的字符比较,其实比的是后缀的大小,而这个顺序正好可以用后缀数组求出来。把字符串翻转,用经典方法把两个字符串拼接起来,因为取尾部的字符时实际上比较的是从后往前的”后缀“。然后求Rank数组。用两个指针搞一搞就O(n)解决了。
CODE:
#include
#include
#include
using namespace std;
const int MAX_N = 60005;
int n, a[MAX_N], l1;
char s[30005];
void init()
{
scanf("%d", &n); getchar();
l1 = n;
for (int i = 0; i < n; i ++)
scanf("%c", &s[i]), getchar();
for (int i = 0; i < n; i ++) a[i] = (int)s[i];
a[n] = 1;
for (int i = 0; i < n; i ++) a[n + n - i] = a[i];
n = n * 2 + 1;
a[n] = 0;
}
int ws[MAX_N], wv[MAX_N], wa[MAX_N], wb[MAX_N];
int sa[MAX_N], H[MAX_N], R[MAX_N];
void da(int *a, int *sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1) {
int p = 0;
for (int i = n - k; i < n; i ++) y[p ++] = i;
for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[wv[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
swap(x, y);
p = 1; x[sa[0]] = 0;
for (int i = 1; i < n; i ++)
x[sa[i]] = (y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
if (p >= n) break;
m = p;
}
}
void calc()
{
for (int i = 1; i <= n; i ++) R[sa[i]] = i;
int k = 0, j;
for (int i = 0; i < n; H[R[i ++]] = k)
for (k ? k -- : 0, j = sa[R[i] - 1]; a[i + k] == a[j + k]; k ++);
}
void doit()
{
da(a, sa, n + 1, 128);
calc();
//for (int i = 1; i <= n; i ++) printf("%d ", sa[i]); printf("\n");
//for (int i = 0; i < n; i ++) printf("%d ", R[i]); printf("\n");
int l = 0, r = l1 - 1, cnt = 0;
while (l <= r) {
int x = l, y = n - r - 1;
//printf("%d %d\n", x, y);
if (cnt == 80) printf("\n"), cnt = 0;
if (R[x] < R[y]) printf("%c", s[l]), l ++, cnt ++;
else printf("%c", s[r]), r --, cnt ++;
}
printf("\n");
}
int main()
{
freopen("bzoj1692.in", "r", stdin);
init();
doit();
return 0;
}
题意:给定一个01串,输出子串出现次数大于1次的次数,按子串的字典序输出。
难度:**~***
首先根据后缀数组的一些经典应用,我们可以知道每个后缀对子串个数的贡献是n-H[i]-sa[i],而且顺序就是字典序,我们枚举每个子串暴力的用H数组前后求匹配:L为向前最多到哪,R为向后最多到哪,答案是R - L。其实还有更简单的Trie树做法。
CODE:
#include
#include
#include
using namespace std;
const int MAX_N = 3005;
int n, a[MAX_N];
char s[MAX_N];
void init()
{
scanf("%d%s", &n, s);
for (int i = 0; i < n; i ++) a[i] = s[i] - '0', a[i] ++;
a[n] = 0;
}
int ws[MAX_N], wv[MAX_N], wa[MAX_N], wb[MAX_N];
int sa[MAX_N], H[MAX_N], R[MAX_N];
void da(int *a, int *sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1) {
int p = 0;
for (int i = n - k; i < n; i ++) y[p ++] = i;
for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[wv[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
swap(x, y);
p = 1; x[sa[0]] = 0;
for (int i = 1; i < n; i ++)
x[sa[i]] = (y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
if (p >= n) break;
m = p;
}
}
void calc()
{
for (int i = 1; i <= n; i ++) R[sa[i]] = i;
int k = 0, j;
for (int i = 0; i < n; H[R[i ++]] = k)
for (k ? k -- : 0, j = sa[R[i] - 1]; a[i + k] == a[j + k]; k ++);
}
void doit()
{
da(a, sa, n + 1, 4);
calc();
for (int i = 1; i <= n; i ++) {
for (int j = H[i] + 1; sa[i] + j - 1 < n; j ++) {
int l, r;
for (l = i; H[l] >= j && l >= 1; l --);
for (r = i + 1; H[r] >= j && r <= n; r ++);
if (r - l> 1) printf("%d\n", r - l);
}
}
}
int main()
{
freopen("bzoj2251.in", "r", stdin);
init();
doit();
return 0;
}
题意:给定一个字符串,求有多少个子串出现至少两次,子串不能重叠。
难度:**
如果已经会了后缀数组的一些模型题和上一道题,这道题就变水了,在上一题的基础上,向前向后匹配的时候再多加一个看两个子串是否有重叠的判定就可以了。
CODE:
#include
#include
#include
using namespace std;
const int MAX_N = 10005;
int n, a[MAX_N];
char s[MAX_N];
void init()
{
n = strlen(s);
for (int i = 0; i < n; i ++) a[i] = s[i] - 'a', a[i] ++;
a[n] = 0;
}
int ws[MAX_N], wv[MAX_N], wa[MAX_N], wb[MAX_N];
int sa[MAX_N], H[MAX_N], R[MAX_N];
void da(int *a, int *sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1) {
int p = 0;
for (int i = n - k; i < n; i ++) y[p ++] = i;
for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[wv[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
swap(x, y);
p = 1; x[sa[0]] = 0;
for (int i = 1; i < n; i ++)
x[sa[i]] = (y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
if (p >= n) break;
m = p;
}
}
void calc()
{
for (int i = 1; i <= n; i ++) R[sa[i]] = i;
int k = 0, j;
for (int i = 0; i < n; H[R[i ++]] = k)
for (k ? k -- : 0, j = sa[R[i] - 1]; a[i + k] == a[j + k]; k ++);
}
void doit()
{
da(a, sa, n + 1, 28);
calc();
int cnt = 0;
for (int i = 1; i <= n; i ++) {
for (int j = H[i] + 1; sa[i] + j - 1 < n; j ++) {
int l, r, tmp = 1;
for (int l = i; H[l] >= j && l >= 1; l --)
if (H[l] >= j && abs(sa[i] - sa[l - 1]) >= j) tmp ++;
for (int r = i + 1; H[r] >= j && r <= n; r ++)
if (H[r] >= j && abs(sa[i] - sa[r]) >= j) tmp ++;
if (tmp > 1) cnt ++;
}
}
printf("%d\n", cnt);
}
int main()
{
//freopen("hdu3518.in", "r", stdin);
while (scanf("%s", s) != EOF) {
if (s[0] == '#') break;
init();
doit();
}
return 0;
}
题意:给两个字符串, 求LCS。
难度:*(模板题)
CODE:
#include
#include
#include
using namespace std;
const int MAX_N = 200005;
char s[100005];
int n, l1, l2, a[MAX_N];
void init()
{
n = 0;
scanf("%s", s);
l1 = strlen(s);
for (int i = 0; i < l1; i ++)
a[n ++] = (int) s[i];
a[n ++] = 1;
scanf("%s", s);
l2 = strlen(s);
for (int i = 0; i < l2; i ++)
a[n ++] = (int) s[i];
a[n] = 0;
//for (int i = 0; i < n; i ++) printf("%c", a[i]);
}
int ws[MAX_N], wv[MAX_N], wa[MAX_N], wb[MAX_N];
int sa[MAX_N], H[MAX_N], R[MAX_N];
void da(int *a, int *sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1) {
int p = 0;
for (int i = n - k; i < n; i ++) y[p ++] = i;
for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[wv[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
swap(x, y);
p = 1; x[sa[0]] = 0;
for (int i = 1; i < n; i ++)
x[sa[i]] = (y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
if (p >= n) break;
m = p;
}
}
void calc()
{
for (int i = 1; i <= n; i ++) R[sa[i]] = i;
int k = 0, j;
for (int i = 0; i < n; H[R[i ++]] = k)
for (k ? k -- : 0, j = sa[R[i] - 1]; a[i + k] == a[j + k]; k ++);
}
void doit()
{
da(a, sa, n + 1, 128);
calc();
int ans = 0;
for (int i = 2; i <= n; i ++) {
if (H[i] > ans) {
int t1 = sa[i - 1] > l1, t2 = sa[i] > l1;
if (t1 ^ t2 == 1) ans = H[i];
}
}
printf("%d\n", ans);
}
int main()
{
freopen("codevs3160.in", "r", stdin);
init();
doit();
return 0;
}
题意:给定一个字符串,Ti表示以第i个字符为开始的后缀,求所有len(Ti)+len[Tj]-2*LCP(Ti,Tj)之和,其中1<=i
难度:***~****
首先这个式子可以拆成两部分计算,一部分是两个后缀长度之和,一部分是LCP长度之和。第一部分很简单,手推一推就能发现每个长度的后缀都计算了n-1次,所以第一部分的答案为(n-1)*(n*(n+1)/2),注意计算过程中需要强制类型转换。而第二部分和之前的一个模型类似,需要用到单调栈。两个后缀的LCP在H数组里其实就是一段区间的最小值,而反过来H数组每段区间的最小值就对应着两个后缀的LCP,我们计算总和,不关心都是谁的,所以单调栈维护一个递增的H,用f[i]表示H数组中从i到n所有区间的LCP之和,每次计算f[i],f[i]=f[st[top]]+H[i]*(st[top]-i),然后计入总和就行。
CODE:
#include
#include
#include
using namespace std;
const int MAX_N = 500005;
typedef long long ll;
char s[MAX_N];
int a[MAX_N], n;
void init()
{
scanf("%s", s);
n = strlen(s);
for (int i = 0; i < n; i ++) a[i] = (int)s[i];
a[n] = 0;
}
int ws[MAX_N], wv[MAX_N], wa[MAX_N], wb[MAX_N];
int sa[MAX_N], H[MAX_N], R[MAX_N];
void da(int *a, int *sa, int n, int m)
{
int *x = wa, *y = wb;
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1) {
int p = 0;
for (int i = n - k; i < n; i ++) y[p ++] = i;
for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
for (int i = 0; i < m; i ++) ws[i] = 0;
for (int i = 0; i < n; i ++) ws[wv[i]] ++;
for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
swap(x, y);
p = 1; x[sa[0]] = 0;
for (int i = 1; i < n; i ++)
x[sa[i]] = (y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
if (p >= n) break;
m = p;
}
}
void calc()
{
for (int i = 1; i <= n; i ++) R[sa[i]] = i;
int k = 0, j;
for (int i = 0; i < n; H[R[i ++]] = k)
for (k ? k -- : 0, j = sa[R[i] - 1]; a[i + k] == a[j + k]; k ++);
}
ll ans = 0, f[MAX_N];
int st[MAX_N];
void doit()
{
da(a, sa, n + 1, 128);
calc();
ans = (ll)(n - 1) * (ll)((ll)n * (ll)(n + 1) / 2);
ll tmp = 0;
int top = 0;
st[++ top] = n + 1;
for (int i = n; i >= 2; i --) {
while (top && H[st[top]] > H[i]) top --;
int x = st[top];
f[i] = 1ll * H[i] * (x - i) + f[x];
tmp += f[i];
st[++ top] = i;
}
ans -= 2ll * tmp;
printf("%lld\n", ans);
}
int main()
{
freopen("bzoj3238.in", "r", stdin);
init();
doit();
return 0;
}