[置顶] poj 3261Milk Patterns 后缀数组入门(以及hash解法)

poj 3261Milk Patterns

题意:给出n个数,找出大于重复次数大于k的最长重复子串

解法:后缀数组求出lcp后,二分枚举答案,判断当前二分的key是否满足重复次数大于k,可以直接枚举height数组,若 height[i] >= k , 那么重复次数加1,否则重复次数记为1。时间复杂度o(nlog(n))。

#include<stdio.h>
#include<string.h>
#include<algorithm>
using namespace std ;

const int maxn = 1111111 ;

int wa[maxn] , wb[maxn] , wv[maxn] , ws[maxn] ;
struct suf
{
	int sa[maxn] , hei[maxn] , rank[maxn] ;

	int cmp ( int *r , int i , int j , int l )
	{
		return r[i] == r[j] && r[i+l] == r[j+l] ;
	}

	void da ( int *r , int n , int m )
	{
		int *x = wa , *y = wb , *t ;
		int i , j , p ;
		for ( i = 0 ; i < m ; i ++ ) ws[i] = 0 ;
		for ( i = 0 ; i < n ; i ++ ) ws[x[i]=r[i]] ++ ;
		for ( i = 1 ; i < m ; i ++ ) ws[i] += ws[i-1] ;
		for ( i = n - 1 ; i >= 0 ; i -- ) sa[--ws[x[i]]] = i ;
		for ( j = 1 , p = 1 ; p < n ; j *= 2 , m = p )
		{
			for ( p = 0 , i = n - j ; i < n ; i ++ ) y[p++] = i ;
			for ( i = 0 ; i < n ; i ++ ) if ( sa[i] >= j ) y[p++] = sa[i] - j ;
			for ( i = 0 ; i < m ; i ++ ) ws[i] = 0 ;
			for ( i = 0 ; i < n ; i ++ ) ws[x[i]] ++ ;
			for ( i = 1 ; i < m ; i ++ ) ws[i] += ws[i-1] ;
			for ( i = n - 1 ; i >= 0 ; i -- ) sa[--ws[x[y[i]]]] = y[i] ;
			for ( t = x , x = y , y = t , p = 1 , x[sa[0]] = 0 , i = 1 ; i < n ; i ++ )
				x[sa[i]] = cmp ( y , sa[i-1] , sa[i] , j ) ? p - 1 : p ++ ;
		}
		int k = 0 ;
		for ( i = 1 ; i < n ; i ++ ) rank[sa[i]] = i ;//这里sa[0]不用加进去了 
		for ( i = 0 ; i < n - 1 ; hei[rank[i++]] = k )//同上,rank[n-1]=0 
			for ( k ? k -- : 0 , j = sa[rank[i]-1] ; r[i+k] == r[j+k] ; k ++ ) ;
	}

	int judge ( int key , int k , int n )
	{
		int i , p = 1 ;
		for ( i = 2 ; i <= n ; i ++ )
		{
			if ( hei[i] >= key ) p ++ ;
			else p = 1 ;
			if ( p >= k ) return 1 ;
		}
		return 0 ;
	}

	int bin ( int r , int k , int n )
	{
		int l = 1 ;
		while ( l <= r )
		{
			int m = ( l + r ) >> 1 ;
			if ( judge ( m , k , n ) ) l = m + 1 ;
			else r = m - 1 ;
		}
		return l - 1 ;
	}

} arr ;

int s[maxn] ;
int main ()
{
	int n , k , i ;
	while ( scanf ( "%d%d" , &n , &k ) != EOF )
	{
		for ( i = 0 ; i < n ; i ++ ) scanf ( "%d" , &s[i] ) , s[i] ++ ;
		s[n] = 0 ;
		arr.da ( s , n + 1 , 1111111 ) ;
		printf ( "%d\n" , arr.bin ( n , k , n ) ) ;
	}
}

同样,我们也可以二分答案,用hash值去判断相同的字符串的个数有几个

#include<stdio.h>
#include<string.h>
#include<algorithm>
#define ull unsigned __int64
using namespace std ;

const int maxn = 1111111 ;
const int x = 123 ;
ull h[maxn] , xp[maxn] , Hash[maxn] ;
int r[maxn] ;

int cmp ( int a , int b ){
	if ( Hash[a] == Hash[b] ) return a < b ;
	return Hash[a] < Hash[b] ;
}

int n , k ;
int judge ( int l ){
	int c = 0 ,i ;
	for ( i = 0 ; i + l - 1 < n ; i ++ ){
		r[i] = i ;
		Hash[i] = h[i] - h[i+l] * xp[l] ;//计算i开头的长为l的字符串的hash值,对于不同字符串的hash值,相同的概率极小,这个我也无法证明
	}
	sort ( r , r + n - l + 1 , cmp ) ;//按hash值排序,通过比较前一个和后一个的值,计算相同的hash值的个数
	for ( i = 0 ; i + l - 1 < n ; i ++ ){
		if ( i == 0 || Hash[r[i]] != Hash[r[i-1]] ) c = 0 ;
		if ( ++ c >= k ) return 1 ;
	}
	return 0 ;
}

int s[maxn] ;
int main (){
	int i , j ;
	while ( scanf ( "%d%d" , &n , &k ) != EOF ){
		for ( i = 0 ; i < n ; i ++ ) scanf ( "%d" , &s[i] ) ;
		h[n] = 0 ;
		for ( i = n - 1 ; i >= 0 ; i -- ) h[i] = h[i+1] * x + s[i] ;//递推算h[i],即以i开头的后缀的hash值
		xp[0] = 1 ;
		for ( i = 1 ; i <= n ; i ++ ) xp[i] = xp[i-1] * x ;//递推算x的i次
		int l = 1 , r = n ;
		while ( l <= r ){
			int m = ( l + r ) >> 1 ;
			if ( judge ( m ) ) l = m + 1 ;
			else r = m - 1 ;
		}
		printf ( "%d\n" , l - 1 ) ;
	}
}


你可能感兴趣的:(算法,字符串,hash,poj,后缀数组)