poj 3261Milk Patterns
题意:给出n个数,找出大于重复次数大于k的最长重复子串
解法:后缀数组求出lcp后,二分枚举答案,判断当前二分的key是否满足重复次数大于k,可以直接枚举height数组,若 height[i] >= k , 那么重复次数加1,否则重复次数记为1。时间复杂度o(nlog(n))。
#include<stdio.h> #include<string.h> #include<algorithm> using namespace std ; const int maxn = 1111111 ; int wa[maxn] , wb[maxn] , wv[maxn] , ws[maxn] ; struct suf { int sa[maxn] , hei[maxn] , rank[maxn] ; int cmp ( int *r , int i , int j , int l ) { return r[i] == r[j] && r[i+l] == r[j+l] ; } void da ( int *r , int n , int m ) { int *x = wa , *y = wb , *t ; int i , j , p ; for ( i = 0 ; i < m ; i ++ ) ws[i] = 0 ; for ( i = 0 ; i < n ; i ++ ) ws[x[i]=r[i]] ++ ; for ( i = 1 ; i < m ; i ++ ) ws[i] += ws[i-1] ; for ( i = n - 1 ; i >= 0 ; i -- ) sa[--ws[x[i]]] = i ; for ( j = 1 , p = 1 ; p < n ; j *= 2 , m = p ) { for ( p = 0 , i = n - j ; i < n ; i ++ ) y[p++] = i ; for ( i = 0 ; i < n ; i ++ ) if ( sa[i] >= j ) y[p++] = sa[i] - j ; for ( i = 0 ; i < m ; i ++ ) ws[i] = 0 ; for ( i = 0 ; i < n ; i ++ ) ws[x[i]] ++ ; for ( i = 1 ; i < m ; i ++ ) ws[i] += ws[i-1] ; for ( i = n - 1 ; i >= 0 ; i -- ) sa[--ws[x[y[i]]]] = y[i] ; for ( t = x , x = y , y = t , p = 1 , x[sa[0]] = 0 , i = 1 ; i < n ; i ++ ) x[sa[i]] = cmp ( y , sa[i-1] , sa[i] , j ) ? p - 1 : p ++ ; } int k = 0 ; for ( i = 1 ; i < n ; i ++ ) rank[sa[i]] = i ;//这里sa[0]不用加进去了 for ( i = 0 ; i < n - 1 ; hei[rank[i++]] = k )//同上,rank[n-1]=0 for ( k ? k -- : 0 , j = sa[rank[i]-1] ; r[i+k] == r[j+k] ; k ++ ) ; } int judge ( int key , int k , int n ) { int i , p = 1 ; for ( i = 2 ; i <= n ; i ++ ) { if ( hei[i] >= key ) p ++ ; else p = 1 ; if ( p >= k ) return 1 ; } return 0 ; } int bin ( int r , int k , int n ) { int l = 1 ; while ( l <= r ) { int m = ( l + r ) >> 1 ; if ( judge ( m , k , n ) ) l = m + 1 ; else r = m - 1 ; } return l - 1 ; } } arr ; int s[maxn] ; int main () { int n , k , i ; while ( scanf ( "%d%d" , &n , &k ) != EOF ) { for ( i = 0 ; i < n ; i ++ ) scanf ( "%d" , &s[i] ) , s[i] ++ ; s[n] = 0 ; arr.da ( s , n + 1 , 1111111 ) ; printf ( "%d\n" , arr.bin ( n , k , n ) ) ; } }
同样,我们也可以二分答案,用hash值去判断相同的字符串的个数有几个
#include<stdio.h> #include<string.h> #include<algorithm> #define ull unsigned __int64 using namespace std ; const int maxn = 1111111 ; const int x = 123 ; ull h[maxn] , xp[maxn] , Hash[maxn] ; int r[maxn] ; int cmp ( int a , int b ){ if ( Hash[a] == Hash[b] ) return a < b ; return Hash[a] < Hash[b] ; } int n , k ; int judge ( int l ){ int c = 0 ,i ; for ( i = 0 ; i + l - 1 < n ; i ++ ){ r[i] = i ; Hash[i] = h[i] - h[i+l] * xp[l] ;//计算i开头的长为l的字符串的hash值,对于不同字符串的hash值,相同的概率极小,这个我也无法证明 } sort ( r , r + n - l + 1 , cmp ) ;//按hash值排序,通过比较前一个和后一个的值,计算相同的hash值的个数 for ( i = 0 ; i + l - 1 < n ; i ++ ){ if ( i == 0 || Hash[r[i]] != Hash[r[i-1]] ) c = 0 ; if ( ++ c >= k ) return 1 ; } return 0 ; } int s[maxn] ; int main (){ int i , j ; while ( scanf ( "%d%d" , &n , &k ) != EOF ){ for ( i = 0 ; i < n ; i ++ ) scanf ( "%d" , &s[i] ) ; h[n] = 0 ; for ( i = n - 1 ; i >= 0 ; i -- ) h[i] = h[i+1] * x + s[i] ;//递推算h[i],即以i开头的后缀的hash值 xp[0] = 1 ; for ( i = 1 ; i <= n ; i ++ ) xp[i] = xp[i-1] * x ;//递推算x的i次 int l = 1 , r = n ; while ( l <= r ){ int m = ( l + r ) >> 1 ; if ( judge ( m ) ) l = m + 1 ; else r = m - 1 ; } printf ( "%d\n" , l - 1 ) ; } }