题意:给定一个字符串,求最少出现K次的最长重复字串,这K个字串可以重叠。
做法:1.hash(二分最大长度,这里要判断的是枚举起点,然后对该长度的字串求hash值,统计下该字串出现了几次,如果>=k,则满足条件)
2.后缀数组(论文经典题,详细看论文)
code:
1.
#include <cstdio> #include <iostream> #include <cstring> #include <algorithm> #include <vector> #include <queue> #include <map> #include <set> #include <cmath> #include <string> #define zero(x) (((x)>0?(x):-(x))<eps) #define MAGIC 1121117 #define eps 1e-8 #define ULL unsigned long long #define Test puts("END") using namespace std; const int MOD = 1000000007; const int INF = 1000000000; const int N = 100005; const int M = 311117; int a[N],n,K,hash[N],fac[N]; int times[M],value[M]; bool used[M]; void makeHash(){ hash[0] = a[0]; for(int i = 1;i < n;i ++){ hash[i] = hash[i - 1] * MAGIC + a[i]; } } int getPos(int v){ int pos = (v % M + M) % M; while(used[pos] && value[pos] != v){ pos ++; // cout << "pos " << pos << endl; if(pos >= M){ pos -= M; } } return pos; } int getHash(int l,int r){ if(l == 0) return hash[r]; else return hash[r] - hash[l - 1] * fac[r - l + 1]; } int insert(int v){ int pos = getPos(v); if(!used[pos]){ used[pos] = true; value[pos] = v; } times[pos] ++; return times[pos]; } bool check(int limit){ memset(times,0,sizeof(times)); memset(used,false,sizeof(used)); for(int l = 0;l + limit - 1 < n;l ++){ int r = l + limit - 1; int v = getHash(l,r); int ret = insert(v); if(ret >= K) return true; } return false; } int main(){ // freopen("input.txt","r",stdin); while(scanf("%d%d",&n,&K) != EOF){ for(int i = 0;i < n;i ++){ scanf("%d",&a[i]); } fac[0] = 1; for(int i = 1;i < N;i ++) fac[i] = fac[i - 1] * MAGIC; makeHash(); int l = 0,r = n; int ans = -1; while(l <= r){ int mid = (l + r) >> 1; // cout << l << ' ' << r << endl; if(check(mid)){ ans = mid; l = mid + 1; } else r = mid - 1; } printf("%d\n",ans); } return 0; }
2.后缀数组
#include <cstdio> #include <iostream> #include <cstring> #include <algorithm> #include <vector> #include <queue> #include <map> #include <set> #include <cmath> #include <string> #define zero(x) (((x)>0?(x):-(x))<eps) #define MAGIC 11117 #define eps 1e-8 #define LL long long #define Test puts("END") using namespace std; const int MOD = 1000000007; const int INF = 1000000000; const int N = 2000100; const int M = 2000100; int s[N],n,K; int wa[N],wb[N],wv[N],wc[N],rank[N],height[N],sa[N]; void calheight(int *r,int *sa,int n) { int i,j,k = 0; for(i = 1;i <= n;i ++) rank[sa[i]] = i; for(i = 0;i < n;height[rank[i ++ ]] = k) for(k ? k -- : 0,j = sa[rank[i] - 1];r[i + k] == r[j + k];k ++); return ; } int cmp(int *r,int a,int b,int l){ return r[a] == r[b] && r[a + l] == r[b + l]; } void da(int *r,int *sa,int n,int m){ int i,j,p,*x = wa,*y = wb,*t; for(i = 0;i < m;i ++) wc[i] = 0; for(i = 0;i < n;i ++) wc[x[i] = r[i]] ++; for(i = 1;i < m;i ++) wc[i] += wc[i - 1]; for(i = n - 1;i >= 0;i --) sa[-- wc[x[i]]] = i; for(j = 1,p = 1;p < n;j *= 2,m = p){ for(p = 0,i = n - j;i < n;i ++) y[p ++] = i; for(i = 0;i < n;i ++) if(sa[i] >= j) y[p ++] = sa[i] - j; for(i = 0;i < n;i ++) wv[i] = x[y[i]]; for(i = 0;i < m;i ++) wc[i] = 0; for(i = 0;i < n;i ++) wc[wv[i]] ++; for(i = 1;i < m;i ++) wc[i] += wc[i - 1]; for(i = n - 1;i >= 0;i --) sa[-- wc[wv[i]]] = y[i]; for(t = x,x = y,y = t,p = 1,x[sa[0]] = 0,i = 1;i < n;i ++) x[sa[i]] = cmp(y,sa[i - 1],sa[i],j) ? p - 1 : p ++; } return ; } bool check(int limit){ int l = 0; while(l <= n){ if(height[l] < limit){ l ++; continue; } int r = l; while(r <= n && height[r] >= limit){ r ++; } if(r - l + 1>= K) return true; l = r; } return false; } int main(){ // freopen("input.txt","r",stdin); while(scanf("%d%d",&n,&K) != EOF){ for(int i = 0;i < n;i ++){ scanf("%d",&s[i]); } da(s,sa,n + 1,200); calheight(s,sa,n); /*for(int i = 0;i <= n;i ++) printf("i:%d sa:%d height:%d\n",i,sa[i],height[i]);*/ // check(4); int l = 0,r = n; int ans = -1; while(l <= r){ int mid = (l + r) >> 1; // cout << l << ' ' << r << endl; if(check(mid)){ ans = mid; l = mid + 1; } else r = mid - 1; } printf("%d\n",ans); } return 0; }