【倍增后缀数组】 poj1743 &2406& 3261 & 3294 & 2774


        曾经有几次想写后缀数组,结果被论文中缩成豆腐块的程序吓到了.......

        现在看懂了之后, 被这个程序写的丑陋吓到了。

        丑虽丑,但是写的还是很精巧的, 照着打吧。


        后缀数组其实就是吧一个长度为n的字符串(当然也可以是其他的,只要能比大小)的所有后缀排序后存在数组里。

        打裸是O(n*n*log(n))的。

        其实如果以每个点为开头,分别求出向后延伸1, 2, 4 ....  的排列顺序,后者是可以有前者推出的,配合基数排序,就可以把复杂度降低到O  (n log (n));

         当然, 代码很精巧,也很丑。


         把求出的后缀排序后的首字母放在sa数组中, 用sa 退出height (height (i) =  suffix(i)与 suffix(pre-rank(i)的最长公共前缀),  有这么一个定理 height[ rank[i ]] >= height[ rank[i -1]]-1  , 按照rank的顺序来求, 用变量存上一个rank的height, 由于这个变量顶多减少n次, 类似kmp的复杂度分析,这是o(n) 的。


         用height就可以求很多东西了。


        poj 1743  求不可重叠最长重复字串。

        求出height后,二分答案,把height数组 分成若干份, 每份内的height都大于二分的数,观察每份内,是否存在解即可。

         nlogn

         当然,这道题规定,如果一段数同时减去一个数后, 与另一段数相同,也算作相同,这里就要用差分思想解决了。

       

        

# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>

using namespace std;

const int maxn = 100000, oo = 1073741819;
int pret[maxn], s[maxn], height[maxn], sa[maxn], stmp[maxn], lis[maxn], rank[maxn], rk[maxn];
int p, n, l, r, mid, i;

void read()
{
	int i;
	memset(s, 0, sizeof(s));
	memset(sa, 0, sizeof(sa));
	scanf("%d", &n);
    for (i = 1; i<= n; i++)
       scanf("%d", &s[i]);
    for (i = 1; i< n; i++)
       s[i] = s[i+1]-s[i];
    s[n] = 0; n--;
    for (i = 1; i<= n; i++)
       s[i] += 89;
    
}

void work_suffix(int n, int m)
{
	int i, j; 
	memset(pret, 0, sizeof(pret));
	for (i = 1; i <= n; i++) pret[rk[i]=s[i]]++;
	for (i = 1; i <= m; i++) pret[i]+= pret[i-1];
	for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i;
	for (j = 1, p = 0; p < n; j <<=1, m = p)
	{
		for (p = 0, i = n-j+1; i <=n; i++) lis[++p] = i;
		for (i = 1; i <=n ; i++) if (sa[i]>j) lis[++p] = sa[i] - j;
		memset(pret, 0, sizeof(pret));
		for (i = 1; i <=n ; i++) stmp[i] = rk[lis[i]];
		for (i = 1; i <=n ; i++) pret[stmp[i]]++;
		for (i = 1; i <=m ; i++) pret[i]+= pret[i-1];
		for (i = n; i >=1 ; i--) sa[pret[stmp[i]]--] = lis[i];
		for (i = 1; i <=n ; i++) lis[i] = rk[i];
		for (p = 1, rk[sa[1]] = 1, i = 2; i <= n; i++)
		    rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p;
	} 
}

void work_height()
{
	int i, j, p = 0;
	for (i = 1; i <= n; i++) rank[sa[i]] = i;
	for (i = 1; i < n; height[rank[i++]] = p)
       for (p?p--:0,j = sa[rank[i]-1]; j+p <=n+1 && i+p <=n+1 && s[j+p]==s[i+p]; p++);
}

bool check(int mid)
{
    int ll,rr;
	for (i = 1; i <= n; i++)
	{
		if (height[i] < mid) ll = oo, rr = -oo;
		ll = ll > sa[i] ? sa[i]:ll;
		rr = rr < sa[i] ? sa[i]:rr;
		if (rr- ll >= mid) return true; 
	}
	return false;
}

int main()
{
	freopen("1743.in", "r", stdin);
	freopen("1743.out", "w", stdout);
	for (;;)
	{
		read();
		if (n == -1) break;
		s[++n]= 0;
		work_suffix(n, 1000);
		work_height();
		for (l = 0,r = n; l < r;)
		{
			if (check(mid = (l+r+1 >>1))) l = mid;
			else r = mid-1;
		}
		l++;
		printf("%d\n", l >= 5? l:0);
	}
	return 0;
}



         poj 3261 求重复k次的最长重复字串, 可重叠。

         有木有发现和前面一道题是一样的二分检验?

      

# include <cstdlib>
# include <cstdio>
# include <cstring>

using namespace std;

const int oo = 1073741819, maxn = 30000;
int rank[maxn], a[maxn], pret[1000000+5], rk[maxn];
int sa[maxn], lis[maxn], stmp[maxn], height[maxn];
int i , n, k; 
void read()
{
  int i;
  scanf("%d%d", &n, &k);
  for (i = 1; i <= n; i++)
    scanf("%d", &a[i]), a[i]++;
  a[++n]=0;
}

 
void work_suffix(int n, int m)
{
  int i, j, p;
  for (i=1; i<=m; i++) pret[i] = 0;
  for (i=1; i<=n; i++) pret[rk[i] = a[i]]++;
  for (i=1; i<=m; i++) pret[i]+= pret[i-1];
  for (i=n; i>=1; i--) sa[pret[a[i]]--] = i;
  for (j = 1, p = 0; p <n; j <<=1, m = p)
  {
    for (p = 0, i=n-j+1; i<=n; i++) lis[++p] = i;
    for (i=1; i<=n; i++) if (sa[i] > j) lis[++p] = sa[i]-j;
    for (i=1; i<=m; i++) pret[i] = 0; 
    for (i=1; i<=n; i++) stmp[i]=rk[lis[i]];
    for (i=1; i<=n; i++) pret[stmp[i]]++;
    for (i=1; i<=m; i++) pret[i]+=pret[i-1];
    for (i=n; i>=1; i--) sa[pret[stmp[i]]--]=lis[i];
    for (i=1; i<=n; i++) lis[i]=rk[i];
    for (rk[sa[1]]=1, i=2, p=1; i<=n; i++)
      rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j]) ? p:++p;
  } 
}

bool check(int mid)
{
  int i, ll, rr;
  for (i=1; i<=n; i++)
  {
    if (height[i] < mid ) ll = oo, rr = -oo;
    if (i < ll) ll = i;
    if (i > rr) rr = i;
    if (rr - ll >= k-1) return true;
  }
  return false;
}

void work_height()
{
  int i,j,p = 0;
  for (i=1; i<=n; i++) rank[sa[i]] = i;
  for (i=1, p=0; i<=n; height[rank[i++]] = p) 
    for (p?p--:0, j=sa[rank[i]-1];a[j+p]==a[i+p]; p++); 
}

int main()
{
  freopen("3261.in","r", stdin);
  freopen("3261.out", "w", stdout);
  read();
  work_suffix(n, 1000000+5);
  work_height();
  int mid, l = 0, r = n;
  for (;l < r;)
  {
    if (check(mid = (l+r+1 >> 1))) l = mid;
    else r = mid-1;
  }
  printf("%d", l);
  return 0;
}

         poj 2406

         给定字符串l,它是由字串s重复k次得到的,求k的最大值。

     

          同样的求height, 枚举s的长度,首先strlen(l)%  strlen(s) == 0, 其次, suffix(1)与suffix(1+strlen(s)) 的最长公共前缀 = n - strlen(s), 画个图就很明了了。

        

          当然,这道题不是出给后缀数组的,是出个kmp的,数据出到了100 0000, 倍增是过不了的, 只能用丑陋的DC3

          贴个kmp的算了:

# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>

const int maxn = 1000000+5;
char s[maxn];
int n,i,j, next[maxn];

int main()
{
  //freopen("2406.in", "r", stdin);
  //freopen("2406.out", "w", stdout);
  for (;;)
    {
      memset(s, 0, sizeof(s));
      scanf("%s\n", s+1);
      n = strlen(s+1);
      if (s[1] == '.') return 0;  
      for (i = 2, j = 0; i <= n; next[i++] = j)
	{
	  for (;j > 0 && s[j+1] != s[i];j = next[j]);
	  if (s[i] == s[j+1]) j++;
	}
      printf("%d\n", n % (n - next[n]) == 0 ? n / (n - next[n]): 1);
    }
  return 0;
}




poj 3294

求:对于n个字符串,至少出现n div 2+1 个字符串中的 最长字串。

 把n个字符串用未出现且不同的字符连接, 同样的二分答案,分组,组内统计即可。

# include <cstdlib>
# include <cstdio>
# include <cstring>

using namespace std;

const int maxn = 100000+5000;
char s[10000+5];
int height[maxn], pret[maxn], sa[maxn], rk[maxn], rank[maxn], stmp[maxn], lis[maxn], a[maxn],id[maxn]; 
int ans[1000+5];
bool have[1000+5];
int lim, n, i, j, key, len, tot;

void read()
{
  scanf("%d", &n);  key = 30, tot = 0;
  for (i = 1; i <= n; i++)
  {
     scanf("%s", s+1); len = strlen(s+1);
     for (j = 1; j <=len; j++) a[++tot] = s[j] - 'a'+1, id[tot] = i;
     a[++tot] = key++;  id[tot] = 0;
  }
  a[tot] = 0;
}

void work_suffix(int n, int m)
{
  int i, j, p;
  for (i = 1; i<=m; i++) pret[i] = 0;
  for (i = 1; i<=n; i++) pret[rk[i]=a[i]]++;
  for (i = 1; i<=m; i++) pret[i]+= pret[i-1];
  for (i = n; i>=1; i--) sa[pret[rk[i]]--] = i;
  for (j = 1, p = 0; p < n; j <<=1, m =p)
  {
    for (p = 0, i = n-j+1; i<=n; i++) lis[++p] = i;
    for (i = 1; i <=n; i++) if (sa[i]>j) lis[++p]=sa[i]-j;
    for (i = 1; i <=m; i++) pret[i] = 0;
    for (i = 1; i <=n; i++) stmp[i]= rk[lis[i]];
    for (i = 1; i <=n; i++) pret[stmp[i]]++;
    for (i = 1; i <=m; i++) pret[i] += pret[i-1];
    for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i];
    for (i = 1; i <=n; i++) lis[i] = rk[i];
    for (i = 2, p = 1, rk[sa[1]] = 1; i <=n; i++)
      rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] ==lis[sa[i-1]+j]) ? p:++p;
  } 
}

void work_height(int n)
{
  int i ,j, p;
  for (i = 1; i<=n; i++) rank[sa[i]] = i;
  for (i = 1, p =0; i <=n; height[rank[i++]] = p)
    for (p?p--:0, j = sa[rank[i]-1]; a[i+p]== a[j+p]; p++);
}

bool check(int mid)
{
  int all, i;
  if (mid == 0) return true;
  memset(ans, 0, sizeof(ans));
  for (i = 1; i <=tot; i++)
   {
    if (height[i] < mid)
    {
        memset(have, false, sizeof(have));
        all = 0;
    }
    if (id[sa[i]] != 0) 
    {
	  int old  = all;
      if (have[id[sa[i]]] == false) all++, have[id[sa[i]]] = true;
      if (all == n / 2 +1 && old < n / 2+1) ans[++ans[0]] = sa[i];
    }
  }
  if (ans[0] != 0 ) return true;
  return false; 
}

int main()
{
  
  for (;;)
  {
    read();
    if (n == 0) break;
    if (n == 1) printf("?\n");
    if (n == 1) continue;
    work_suffix(tot, key+1);
    work_height(tot);
    int l, r, mid; lim = n / 2 +1;
    for (l = 0, r = tot; l <r;)
    {
      if (check(mid = l+r+1 >> 1)) l = mid;
	else  r = mid-1; 
    }
    check(l);
      if (l == 0) printf("?\n");
      else 
	{
	  //	  printf("%d\n", l);
          for (i = 1; i <= ans[0]; i++)
	  {
	    for (j = 1; j <=l; j++) printf("%c", a[ans[i]+j-1] + 'a' -1);
            printf("\n");
          }
        }
    printf("\n");
  }
  return 0;
}

poj 2774
给定两个字符串, 求他们的最长公共子串;

把两个字符串以未出现字符连接后,求height,
可以肯定的是,答案就出现在height中,在sa上相隔多个没有相邻优。
那么只要扫一遍所有的height, 如果这个height 满足前后两个后缀开头出现在不同字符串上,则可以用来更新答案。

# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>

using namespace std;

const int maxn = 300000;
int height[maxn],id[maxn],pret[maxn],sa[maxn],stmp[maxn],rk[maxn],a[maxn],lis[maxn],rank[maxn];
int tot, i , n , ans;
char s[maxn / 2];

void read()
{
	int i, len;
	scanf("%s\n", s+1); len = strlen(s+1);
	for (i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 1;
	scanf("%s\n", s+1); len = strlen(s+1);
	for (a[++tot] = 30, i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 2;
	a[++tot] = 0;
}

void work_suffix(int n, int m)
{
	int i , j, p;
	for (i = 1; i <= m; i++) pret[i] = 0;
	for (i = 1; i <= n; i++) pret[rk[i] = a[i]] ++;
	for (i = 1; i <= m; i++) pret[i] += pret[i-1];
	for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i;
	for (p = 0, j = 1; p < n; j <<=1, m = p)
	{
		for (p = 0, i = n-j+1; i <= n; i++) lis[++p] = i;
		for (i = 1; i <=n; i++) if (sa[i]> j) lis[++p] = sa[i] - j;
		for (i = 1; i <=m; i++) pret[i] = 0;
		for (i = 1; i <=n; i++) stmp[i] = rk[lis[i]];
		for (i = 1; i <=n; i++) pret[stmp[i]]++;
		for (i = 1; i <=m; i++) pret[i] += pret[i-1];
		for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i];
		for (i = 1; i <=n; i++) lis[i] = rk[i];
		for (i = 2, p = 1, rk[sa[1]] = 1; i<=n; i++)
		    rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p;
	}
}

void work_height(int n)
{
	int i ,j, p= 0;
	for (i = 1; i <=n; i++) rank[sa[i]] = i;
	for (i = 1; i <n; height[rank[i++]] = p)
	  for (p?p--:0, j = sa[rank[i]-1]; a[i+p] == a[j+p]; )
	     p++;
}

int main()
{
	freopen("2774.in", "r", stdin); 
	freopen("2774.out", "w", stdout);
	read();
	work_suffix(tot, 50);
	work_height(tot);
	for (i = 2; i <= tot; i++)
	if (id[sa[i]] != id[sa[i-1]])
	  ans = ans < height[i] ? height[i] : ans;
	printf("%d", ans);
	return 0;
}



         









   

你可能感兴趣的:(【倍增后缀数组】 poj1743 &2406& 3261 & 3294 & 2774)