是时候整理字符串模板了
区间dp+hash
原题地址
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
#define maxn 100200
char str[1001];
int dp[1001][1001];
long long strhash[1001];
long long M = 1000000007;
long long p[1001];
long long base = 1e6 + 7;
long long getHash(int l, int r)
{
//l++; r++;
return (strhash[r] - (strhash[l - 1] * p[r - l + 1]) % M + M) % M;
}
int getKey(char ch)
{
if (ch >= 'a' && ch <= 'z') return ch - 'a' + 1;
if (ch >= 'A' && ch <= 'Z') return ch - 'A' + 27;
return ch - '0' + 54;
}
int solve(int l, int r)
{
if (dp[l][r] != -1) return dp[l][r];
if (l == r)
{
dp[l][r] == 1;
return 1;
}
int ans = 99999999;
for (int i = 1; i <= r - l; ++i)//不可到r-l+1
{
int j;
long long rep = getHash(l, l + i - 1);
for (j = l; j <= r - i + 1; j += i) if (rep != getHash(j, j + i - 1)) break;
int f1, f2;
f1 = f2 = 0;
f1 = solve(l, l + i - 1);
if (j <= r) f2 = solve(j, r);
ans = min(ans, f1 + f2);
}
dp[l][r] = ans;
return ans;
}
int main()
{
int len;
scanf_s("%d", &len);
cin >> str + 1;
p[0] = 1;
for (int i = 1; i <= len; ++i) p[i] = p[i - 1] * base % M;
strhash[0] = 0;
for (int i = 1; i <= len; ++i) strhash[i] = ((strhash[i - 1] * base) % M + getKey(str[i])) % M;
//cout<
memset(dp, -1, sizeof(dp));
printf("%d\n", solve(1, len));
return 0;
}
参考博客
代码来自参考博客
//给定一个n*m的字符矩阵,找到两个内容一样的正方形,输出这个正方形的最大边长
//二维hash,先对每一行hash一次,然后在此基础上对每一列hash一次
typedef unsigned long long ull;
const int N = 500 + 10, INF = 0x3f3f3f3f;
const int seed = 131, Seed = 1789;
char ori[N][N];
ull hash1[N][N], seed_pow[N];
ull hash2[N][N], Seed_pow[N];
ull a[N*N];
int n, m;
bool check(int k)
{
int tot = 0;
for(int i = k; i <= n; i++)
{
for(int j = k; j <= m; j++)
{
ull tmp = hash2[i][j] - hash2[i-k][j] * Seed_pow[k] - hash2[i][j-k] * seed_pow[k] + hash2[i-k][j-k] * Seed_pow[k] * seed_pow[k];
//ull tmp = hash2[i][j] - hash2[i-k][j] * Seed_pow[k] - (hash2[i][j-k] - hash2[i-k][j-k] * Seed_pow[k]) * seed_pow[k];
a[++tot] = tmp;
}
}
sort(a + 1, a + 1 + tot);
for(int i = 1; i <= tot-1; i++)
if(a[i] == a[i+1]) return true;
return false;
}
int main()
{
seed_pow[0] = Seed_pow[0] = 1;
for(int i = 1; i < N; i++)
{
seed_pow[i] = seed_pow[i-1] * seed;
Seed_pow[i] = Seed_pow[i-1] * Seed;
}
scanf("%d%d", &n, &m);
for(int i = 1; i <= n; i++) scanf(" %s", ori[i] + 1);
for(int i = 1; i <= n; i++)
for(int j = 1; j <= m; j++)
hash1[i][j] = hash1[i][j-1] * seed + ori[i][j];
for(int i = 1; i <= m; i++)
for(int j = 1; j <= n; j++)
hash2[j][i] = hash2[j-1][i] * Seed + hash1[j][i];
int ans = 0;
int l = 0, r = min(n, m);
while(l <= r)
{
int mid = (l + r) >> 1;
if(check(mid)) ans = mid, l = mid + 1;
else r = mid - 1;
}
printf("%d\n", ans);
return 0;
}
kmp求字符串最小循环节
原题地址
#include
#include
using namespace std;
const int N = 1e6 + 5;
char s[N];
int n, ne[N];
void getNext() {
ne[1] = 0;
for (int i = 2, j = 0; i <= n; i++) {
while (j && s[i] != s[j + 1]) j = ne[j];
if (s[i] == s[j + 1]) j++;
ne[i] = j;
}
}
int main() {
while (scanf("%s", s + 1), strcmp(s + 1, ".") != 0) {
n = strlen(s + 1);
getNext();
int len = n - ne[n];
if (n % len == 0) printf("%d\n", n / len);
else printf("1\n");
}
return 0;
}
参考博客(理论)
参考博客(代码)
截图自参考博客
theme:给定字符串a与b,问a的字串中 原题地址
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
const double eps = 1e-8;
#define maxn 200020
typedef long long ll;
const ll mod = 998244353;
int kmp[maxn], extend[maxn];
char a[maxn], b[maxn];
void fkmp(char a[],char b[])
{
int j, la, lb, i;
la = strlen(a + 1);
lb = strlen(b + 1);
j = 0;
for (i = 2; i <= lb; i++)
{
while (j&&b[i] != b[j + 1])
j = kmp[j];//j记录的是该位置真前缀与真后缀最大的相似度
if (b[j + 1] == b[i])j++;
kmp[i] = j;//kmp[i]记录的是该位置前缀与后缀最大的相似度
}
j = 0;
for (i = 1; i <= la; i++)
{
while (j > 0 && b[j + 1] != a[i])
{
extend[i - j] = j;
j = kmp[j];
}
if (b[j + 1] == a[i])
j++;//j记录b[i]的前缀与a[j]的后缀的相似度
if (j == lb) { extend[i + 1 - j] = j; j = kmp[j]; }
}
while (i - j <= la) extend[i - j] = j, j = kmp[j];
}
int main()
{
ios::sync_with_stdio(false); cin.tie(0);
int i, j, la, lb;
cin >> a + 1;
cin >> b + 1;
la = strlen(a + 1);
lb = strlen(b + 1);
fkmp(a, b);
ll ans = 0;
for (i = 1; i <= la; i++)
{
if (extend[i] == lb)
ans = ans + (ll)(lb - 1);
else
{
if (a[i + extend[i]] < b[extend[i] + 1])
ans = ans + (ll)(la - i + 1);
else ans += extend[i] > 0 ? (ll)extend[i] : 0;
}
}
cout << ans << endl;
return 0;
}
做的题比较少,还要加强。
一种简单的变形参考博客
代码来自参考博客
#include
#include
#include
#include
using namespace std;
const int MAX_N = 1e6+9;
int str[MAX_N];
int s[MAX_N<<1];
int len[MAX_N<<1];
int init(int N)
{
s[0] = 0;
for(int i=1;i<=2*N;i+=2)
{
s[i] = 1;
s[i+1] = str[i/2];
}
s[2*N+1] = 1;
s[2*N+2] = -1;//不可删除
return 2*N+1;
}
int manacher(int ll)
{
int mx = 0 , po = 0;
int ans = 0;
for(int i=1;i<=ll;i++)
{
if(mx > i) len[i] = min(mx-i,len[2*po-i]);
else len[i] = 1;
while(s[i-len[i]] == s[i+len[i]])
{
if(s[i+len[i]] != 1 && s[i+len[i]-2] < s[i+len[i]])//相较于普通模板,只多了这一个判断
{
break;
}
len[i] ++ ;
}
if(len[i]+i > mx )
{
mx = len[i]+i;
po = i;
}
ans = max(ans , len[i]-1);
}
return ans;
}
int main()
{
//freopen("in.txt","r",stdin);
int T,N,M=1;
cin>>T;
while(T--)
{
cin>>N;
for(int i=0;i<N;i++) scanf("%d",&str[i]);
int ll = init(N);
int ans = manacher(ll);
cout<<ans<<endl;
}
}
POJ2778
著名的AC自动机+矩阵快速幂问题
#include
#include
#include
#include
#include
#include
#include
using namespace std;
typedef long long ll;
#define maxn 110
#define mod 100000
struct node
{
int fail, end, ss;
int ch[4];
}tr[maxn];
int cnt = 0, m;
ll n;
int mmp[maxn], in[maxn], vis[maxn];
string s[11];
queue<int>ww;
queue<int>tp;
struct matrix
{
ll mat[maxn][maxn];
}base, unit;
int cit(char c)
{
if (c == 'A') return 0;
else if (c == 'C') return 1;
else if (c == 'G') return 2;
else return 3;
}
matrix operator *(matrix a, matrix b)//重载矩阵乘法
{
struct matrix s;
ll i, j, k;
for (i = 0; i <= cnt; i++)
for (j = 0; j <= cnt; j++)
s.mat[i][j] = 0;
for (i = 0; i <= cnt; i++)
for (j = 0; j <= cnt; j++)
for (k = 0; k <= cnt; k++)
s.mat[i][j] = (s.mat[i][j] + (a.mat[i][k] * b.mat[k][j])) % (mod);
return s;
}
void build(string p, int num)
{
int len = p.length(), s, i, u = 0;
for (i = 0; i < len; i++)
{
s = cit(p[i]);
if (!tr[u].ch[s])
{
tr[u].ch[s] = ++cnt;
}
u = tr[u].ch[s];
}
tr[u].end = num;
//mmp[num] = tr[u].end;
}
void get_fail()
{
int i, u = 0;
for (i = 0; i < 4; i++)
{
if (tr[u].ch[i])
{
tr[tr[u].ch[i]].fail = 0;
ww.push(tr[u].ch[i]);
}
}
while (!ww.empty())
{
u = ww.front();
ww.pop();
for (i = 0; i < 4; i++)
{
if (tr[u].ch[i])
{
tr[tr[u].ch[i]].fail = tr[tr[u].fail].ch[i];
in[tr[tr[u].fail].ch[i]]++;
ww.push(tr[u].ch[i]);
}
else tr[u].ch[i] = tr[tr[u].fail].ch[i];
}
if (tr[tr[u].fail].end) tr[u].end = tr[tr[u].fail].end;//加了一个end判断,这样不要跳fail了
}
}
void mqp(ll k)
{
int i, j;
for (i = 0; i <= cnt; i++)
{
unit.mat[i][i] = 1;
}
struct matrix vw;
vw = base;
while (k)
{
if (k & 1) unit = unit * base;
base = base * base;
k = k / 2;
}
}
void AC()
{
int u = 0, i, j;
for (i = 0; i <= cnt; i++)
{
for (j = 0; j < 4; j++)
{
if (tr[tr[i].ch[j]].end) continue;
base.mat[i][tr[i].ch[j]]++;
}
}
mqp(n);
ll res = 0;
for (i = 0; i <= cnt; i++)
{
//if (!tr[i].end) continue;
res = (res + unit.mat[0][i]) % mod;
}
cout << res << endl;
}
int main()
{
ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
int i, j, k;
cin >> m >> n;
cnt = 0;
for (i = 1; i <= m; i++)
{
cin >> s[i];
build(s[i], i);
}
tr[0].fail = 0;
get_fail();
AC();
return 0;
}
HDU 2243
思路和上一题大致相同,不过这一题要求询问路径小于m的方案数之和,这里我们可以增加一列,最后一列全为1,这样最后一列的第一行每次的值为前m-1次第一行的值之和+1(路径长为0的情况有一种)。
之前的博客
HDU 5030
这题简直把所有SA的技巧都用了一遍
思想就是二分枚举选出来的串在所有子串中的排名,然后扫一遍原串,贪心地去切割,看看能不能只切成k块
#include
#include
#include
#include
#include
#include
using namespace std;
#define maxn 100010
typedef long long ll;
int SA[maxn], rk[maxn], Height[maxn], tax[maxn], tp[maxn], n, m, a[maxn];
int qq[maxn][20], lg[maxn], K;
int astl, astr, slen;//astl选定串左位置,astr右位置,slen为长度
ll snum, stf[maxn], atf[maxn];
//rk[i] 第i个后缀的排名; SA[i] 排名为i的后缀位置; Height[i] 排名为i的后缀与排名为(i-1)的后缀的LCP
//tax[i] 计数排序辅助数组; tp[i] rk的辅助数组(计数排序中的第二关键字),与SA意义一样。
//a为原串
void RSort(int n)
{
//rk第一关键字,tp第二关键字。
for (int i = 0; i <= m; i++) tax[i] = 0;
for (int i = 1; i <= n; i++) tax[rk[tp[i]]] ++;
for (int i = 1; i <= m; i++) tax[i] += tax[i - 1];
for (int i = n; i >= 1; i--) SA[tax[rk[tp[i]]] --] = tp[i]; //确保满足第一关键字的同时,再满足第二关键字的要求
} //计数排序,把新的二元组排序。
int cmp(int *f, int x, int y, int w) { return f[x] == f[y] && f[x + w] == f[y + w]; }
//通过二元组两个下标的比较,确定两个子串是否相同
void Suffix(int a[], int n)
{
for (int i = 1; i <= n; i++) rk[i] = a[i], tp[i] = i;
m = 26, RSort(n);
for (int w = 1, p = 1, i; p < n; w += w, m = p)
{
for (p = 0, i = n - w + 1; i <= n; i++) tp[++p] = i;
for (i = 1; i <= n; i++) if (SA[i] > w) tp[++p] = SA[i] - w;
RSort(n);
for (int jr = 1; jr <= n; jr++)
{
int tkw = tp[jr];
tp[jr] = rk[jr];
rk[jr] = tkw;
}
rk[SA[1]] = p = 1;
for (i = 2; i <= n; i++) rk[SA[i]] = cmp(tp, SA[i], SA[i - 1], w) ? p : ++p;
}
int j, k = 0;
for (int i = 1; i <= n; Height[rk[i++]] = k)
for (k = k ? k - 1 : k, j = SA[rk[i] - 1]; a[i + k] == a[j + k]; ++k);
//这个知道原理后就比较好理解程序
}
char ch[maxn];
void init()
{
for (int i = 2; i <= 100000; i++)
{
lg[i] = lg[i / 2] + 1;
}
}
int lcp(int a, int b, int n)//求a,b的后缀的公用前缀长度,从0计
{
if (a == b) return n - a + 1;
a = rk[a], b = rk[b];
if (a > b) swap(a, b); ++a;
int kw = lg[b - a + 1];
return min(qq[a][kw], qq[b - (1 << kw) + 1][kw]);
}
void stit()//st表初始化
{
int i, j;
for (i = 1; i <= n; i++) qq[i][0] = Height[i];
for (j = 1; j <= lg[n]; j++)
{
for (i = 1; i + (1 << j) - 1 <= n; i++)
{
qq[i][j] = min(qq[i][j - 1], qq[i + (1 << j - 1)][j - 1]);//递归过程,状态转移方程
}
}
}
void stStringInit(int n)//总共有多少本质不同子串
{
for (int i = 1; i <= n; i++)
{
stf[i] = n - SA[i] - Height[i] + 1;
atf[i] = atf[i - 1] + stf[i];
}
snum = atf[n];
}
int asis(int l, int r, ll v)
{
int mid;
while (l < r)
{
mid = (l + r) / 2;
if (atf[mid] < v) l = mid + 1;//if(不符合条件)
else r = mid;
}
return l;
}
void gast(ll ct)//查找排名为ct的子串
{
int tmp;
tmp = asis(1, n, ct);
astl = SA[tmp];
astr = astl + Height[tmp] + ct - atf[tmp - 1] - 1;
slen = astr - astl + 1;
}
bool check(ll x)
{
gast(x);
int i, j, sum = 0, num = 0;
for (i = n; i >= 1; i--)//必须从后向前扫
{
if (rk[i] < rk[astl])
{
num++;
}
else if (lcp(i, astl, n) == 0)//这种情况肯定选出来的串不是最大的,直接false
{
return false;
}
else
{
num++;
if (num <= (min(lcp(i, astl, n), slen))) continue;//可以不切
else
{
num = 1; sum++;//必须要切一刀
}
}
}
//这样满足条件的串不保证是最优值,所以要二分
if (sum + 1 <= K)return true;
else return false;
}
ll bs(ll l, ll r)
{
while (l < r)
{
ll mid = (l + r) / 2;
if (!check(mid)) l = mid + 1;
else r = mid;
}
return l;
}
int main()
{
ios::sync_with_stdio(false); cin.tie(0); cout.tie(0);
int i, j;
init();
while (cin >> K && K)
{
cin >> ch + 1;
n = strlen(ch + 1);
for (i = 1; i <= n; i++)
{
a[i] = ch[i] - 'a' + 1;//这里不+1的话会WA
}
//a[++n] = 26;
Suffix(a, n);
stit();
stStringInit(n);
ll ans;
ans = bs(1, snum);
gast(ans);
for (i = astl; i <= astr; i++) cout << ch[i];
cout << endl;
}
return 0;
}
参考博客
struct NODE
{
int ch[26];
int len,fa;
NODE(){memset(ch,0,sizeof(ch));len=0;}
}dian[MAXN<<1];
int las=1,tot=1;
void add(int c)
{
int p=las;int np=las=++tot;
dian[np].len=dian[p].len+1;
for(;p&&!dian[p].ch[c];p=dian[p].fa)dian[p].ch[c]=np;
if(!p)dian[np].fa=1;//以上为case 1
else
{
int q=dian[p].ch[c];
if(dian[q].len==dian[p].len+1)dian[np].fa=q;//以上为case 2
else
{
int nq=++tot;dian[nq]=dian[q];
dian[nq].len=dian[p].len+1;
dian[q].fa=dian[np].fa=nq;
for(;p&&dian[p].ch[c]==q;p=dian[p].fa)dian[p].ch[c]=nq;//以上为case 3
}
}
}
char s[MAXN];int len;
int main()
{
scanf("%s",s);len=strlen(s);
for(int i=0;i<len;i++)add(s[i]-'a');
}
让我们来想想SAM到底构造出了一个神马东东。
首先,它是一个endpos树,树的每个结点对应一个子串(就是从根到其的路径),树的叶节点所对应的子串肯定为主干道(即前缀)
它实际存在的边是一个DAG,这使它可以dfs或反向tpsort
当我们想知道一个子串出现了多少次的时候,只要统计它是多少个前缀的后缀就行了,所以只要在add函数的第二行加一句dian[np].siz=1即可。然后dfs一下,就可以了。
如果要统计总共有多少本质不同的子串,那么每个dian[i].siz都要初始为1
两种方法求不同子串参考博客
如果在此基础上再进行一次反tpsort的话,那就是统计从该路径走可获得的不同子串数之和了参考博客
参考博客
相较于普通SAM,只加了两个判断。
牛客多校第四场C
一道反套路题,把要变的串拆出来加到广义SAM上
#include
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
typedef long long ll;
#define maxn 1000010
struct NODE
{
int ch[26];
int len, fa;
NODE() { memset(ch, 0, sizeof(ch)); len = 0; }
}dian[2 * maxn];
int las = 1, tot = 1, n, pos[10], a[maxn], ne[maxn];
int add(int c)
{
if (dian[las].ch[c])//新加的点已存在
{
int p = las, x = dian[las].ch[c];
if (dian[p].len + 1 == dian[x].len)
{
return x;
}
else
{
int y = ++tot;
dian[y].len = dian[p].len + 1;
for (int i = 0; i < 26; i++) dian[y].ch[i] = dian[x].ch[i];
while (p&&dian[p].ch[c] == x) dian[p].ch[c] = y, p = dian[p].fa;
dian[y].fa = dian[x].fa;
dian[x].fa = y;
return y;
}
}
//普通SAM
int p = las; int np = las = ++tot;
dian[np].len = dian[p].len + 1;
for (; p && !dian[p].ch[c]; p = dian[p].fa)dian[p].ch[c] = np;
if (!p)dian[np].fa = 1;//以上为case 1
else
{
int q = dian[p].ch[c];
if (dian[q].len == dian[p].len + 1)dian[np].fa = q;//以上为case 2
else
{
int nq = ++tot; dian[nq] = dian[q];
dian[nq].len = dian[p].len + 1;
dian[q].fa = dian[np].fa = nq;
for (; p&&dian[p].ch[c] == q; p = dian[p].fa) dian[p].ch[c] = nq;//以上为case 3
}
}
return np;
}
char s[maxn]; int len;
ll sak()
{
ll ans = 0;
for (int i = 2; i <= tot; i++)
{
ans = ans + (ll)(dian[i].len - dian[dian[i].fa].len);
}
return ans;
}
int main()
{
ios::sync_with_stdio(false);
int i, j;
cin >> s + 1;
n = strlen(s + 1);
for (i = 0; i < 10; i++) pos[i] = n + 1;
for (i = n; i; i--)
{
ne[i] = n + 1;
for (j = s[i] - 'a'; j < 10; j++)
ne[i] = min(ne[i], pos[j]);
pos[s[i] - 'a'] = i;
}
a[n + 1] = las;
for (i = n; i; i--)
{
las = a[ne[i]];
for (j = ne[i] - 1; j >= i; j--)
{
las = add(s[i] - 'a');
}
a[i] = las;
}
ll mans = sak();
cout << mans << endl;
return 0;
}
洛谷P3181
原题地址
求两个字符串取公共子串的不同方案数量
#include
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
typedef long long ll;
#define maxn 400010
struct NODE
{
int ch[26];
int len, fa;
NODE() { memset(ch, 0, sizeof(ch)); len = 0; }
}dian[2 * maxn];
int las = 1, tot = 1, n, siz[2 * maxn][2], ru[2 * maxn];
int add(int c, int id)
{
if (dian[las].ch[c])
{
int p = las, x = dian[las].ch[c];
if (dian[p].len + 1 == dian[x].len)
{
siz[x][id] = 1;//记录在id串中x点对应的子串出现了几次
return x;
}
else
{
int y = ++tot;
dian[y].len = dian[p].len + 1;
for (int i = 0; i < 26; i++) dian[y].ch[i] = dian[x].ch[i];
while (p&&dian[p].ch[c] == x) dian[p].ch[c] = y, p = dian[p].fa;
dian[y].fa = dian[x].fa;
dian[x].fa = y;
siz[y][id] = 1;
return y;
}
}
//普通SAM
int p = las; int np = las = ++tot;
dian[np].len = dian[p].len + 1;
for (; p && !dian[p].ch[c]; p = dian[p].fa)dian[p].ch[c] = np;
if (!p)dian[np].fa = 1;//以上为case 1
else
{
int q = dian[p].ch[c];
if (dian[q].len == dian[p].len + 1)dian[np].fa = q;//以上为case 2
else
{
int nq = ++tot; dian[nq] = dian[q];
dian[nq].len = dian[p].len + 1;
dian[q].fa = dian[np].fa = nq;
for (; p&&dian[p].ch[c] == q; p = dian[p].fa) dian[p].ch[c] = nq;//以上为case 3
}
}
siz[np][id] = 1;
return np;
}
char s[maxn]; int len;
ll sak()
{
ll ans = 0;
for (int i = 2; i <= tot; i++)
{
++ru[dian[i].fa];//反向tpsort
}
queue<int>qw;
for (int i = 1; i <= tot; i++)
{
if (!ru[i]) qw.push(i);
}
while (!qw.empty())
{
int tx = qw.front(); qw.pop();
siz[dian[tx].fa][0] += siz[tx][0];
siz[dian[tx].fa][1] += siz[tx][1];
if (!--ru[dian[tx].fa]) qw.push(dian[tx].fa);
}
for (int i = 2; i <= tot; i++)
{
ans = ans + (ll)siz[i][0] * siz[i][1] * (dian[i].len - dian[dian[i].fa].len);//求不同方案数之和
}
return ans;
}
int main()
{
ios::sync_with_stdio(false);
int i, j;
for (i = 0; i < 2; i++)
{
cin >> s + 1;
las = 1;
for (j = 1; s[j]; j++)
{
las = add(s[j] - 'a', i);
}
}
ll mans = sak();
cout << mans << endl;
return 0;
}
今年几场多校做下来,感觉大家水平都好强。。。