关于广义后缀树(多串SAM)的总结

之前我们给的SAM的例题,基本上是一个串建SAM的就能做的

如果要建多个串的SAM应该怎么做呢

首先看题,bzoj2780

我一开始的想法是SA以前的弄法,把串拼起来,中间加分隔符做SAM

这题确实可以这么做,这样根据SAM能识别所有子串的性质

而且每个节点都代表了唯一的一个串

每个询问串我们都能找到最终转移到哪(找不到就是没出现过)

问在多少个串出现过这就等价于在ST(s)的parent树的子树中,出现了多少种不同的权值

这显然可以维护dfs序,用经典的离线做法来搞(更好的写法见文末UPD)

  1 type node=record
  2        po,next:longint;
  3      end;
  4 
  5 var go:array[0..300010,1..27] of longint;
  6     d,mx,fa,l,r,p,c,wh,w,fir,next:array[0..300010] of longint;
  7     ans,a,b:array[0..60010] of longint;
  8     e:array[0..300010] of node;
  9     h,t,k,last,len,i,j,n,q,x:longint;
 10     s,ss:ansistring;
 11 
 12 function lowbit(x:longint):longint;
 13   begin
 14     exit(x and (-x));
 15   end;
 16 
 17 function cmp(a,b:longint):boolean;
 18   begin
 19     exit(l[a]<l[b]);
 20   end;
 21 
 22 procedure swap(var a,b:longint);
 23   var c:longint;
 24   begin
 25     c:=a;
 26     a:=b;
 27     b:=c;
 28   end;
 29 
 30 procedure sort(l,r:longint);
 31   var i,j,x:longint;
 32   begin
 33     i:=l;
 34     j:=r;
 35     x:=a[(l+r) shr 1];
 36     repeat
 37       while cmp(a[i],x) do inc(i);
 38       while cmp(x,a[j]) do dec(j);
 39       if not(i>j) then
 40       begin
 41         swap(a[i],a[j]);
 42         swap(b[i],b[j]);
 43         inc(i);
 44         dec(j);
 45       end;
 46     until i>j;
 47     if lthen sort(l,j);
 48     if ithen sort(i,r);
 49   end;
 50 
 51 procedure build(x,y:longint);
 52   begin
 53     inc(len);
 54     e[len].po:=y;
 55     e[len].next:=p[x];
 56     p[x]:=len;
 57   end;
 58 
 59 procedure add(c,x:longint);
 60   var p,q,np,nq:longint;
 61   begin
 62     p:=last;
 63     inc(t); last:=t; np:=t;
 64     w[np]:=x; mx[np]:=mx[p]+1;
 65     while (p<>0) and (go[p,c]=0) do
 66     begin
 67       go[p,c]:=np;
 68       p:=fa[p];
 69     end;
 70     if p=0 then fa[np]:=1
 71     else begin
 72       q:=go[p,c];
 73       if mx[q]=mx[p]+1 then fa[np]:=q
 74       else begin
 75         inc(t); nq:=t;
 76         mx[nq]:=mx[p]+1;
 77         go[nq]:=go[q];
 78         fa[nq]:=fa[q];
 79         fa[q]:=nq; fa[np]:=nq;
 80         while go[p,c]=q do
 81         begin
 82           go[p,c]:=nq;
 83           p:=fa[p];
 84         end;
 85       end;
 86     end;
 87   end;
 88 
 89 procedure dfs(x:longint);
 90   var i:longint;
 91   begin
 92     inc(h);
 93     l[x]:=h;
 94     d[h]:=w[x];  //dfs序
 95     i:=p[x];
 96     while i<>0 do
 97     begin
 98       dfs(e[i].po);
 99       i:=e[i].next;
100     end;
101     r[x]:=h;
102   end;
103 
104 procedure work(x:longint);
105   begin
106     while x<=t do
107     begin
108       inc(c[x]);
109       x:=x+lowbit(x);
110     end;
111   end;
112 
113 function ask(x:longint):longint;
114   begin
115     ask:=0;
116     while x>0 do
117     begin
118       ask:=ask+c[x];
119       x:=x-lowbit(x);
120     end;
121   end;
122 
123 begin
124   readln(n,q);
125   for i:=1 to n do
126   begin
127     readln(ss);
128     len:=length(ss);
129     for j:=1 to len do  //拼接
130     begin
131       s:=s+ss[j];
132       inc(t); wh[t]:=i;
133     end;
134     if i<>n then
135     begin
136       s:=s+chr(97+26);
137       inc(t);
138     end;
139   end;
140   len:=length(s);
141   t:=1; last:=1;
142   for i:=len downto 1 do
143     add(ord(s[i])-96,wh[i]);
144 
145   len:=0;
146   for i:=2 to t do  //构建树
147     build(fa[i],i);
148 
149   dfs(1);
150   for i:=1 to q do
151   begin
152     readln(s);
153     j:=1;
154     len:=length(s);
155     for k:=len downto 1 do  //每个询问串最终转移到哪
156     begin
157       x:=ord(s[k])-96;
158       if go[j,x]=0 then
159       begin
160         j:=0;
161         break;
162       end
163       else j:=go[j,x];
164     end;
165     a[i]:=j;
166     b[i]:=i;
167   end;
168   for i:=t downto 2 do  //经典的离线做法
169   begin
170     next[i]:=fir[d[i]];
171     fir[d[i]]:=i;
172   end;
173   for i:=1 to n do
174     if fir[i]<>0 then work(fir[i]);
175   sort(1,q);
176   j:=1;
177   while a[j]=0 do inc(j);
178   for i:=1 to t do
179   begin
180     while (j<=q) and (l[a[j]]=i) do
181     begin
182       ans[b[j]]:=ask(r[a[j]])-ask(i-1);
183       inc(j);
184     end;
185     if d[i]<>0 then
186       if next[i]<>0 then work(next[i]);
187   end;
188   for i:=1 to q do
189     writeln(ans[i]);
190 end.
2780

然后我看到了bzoj3277 3473(双倍经验)

用我刚才的做法似乎不好搞,因为这题问每个字符串有多少子串出现在至少k个子串中

而刚才那种拼接,每个节点可接受的子串会搞出一堆不存在的子串

这时,我膜拜了wyfcyx的构建广义后缀树的做法,显然这里每个串要反序建SAM(这样才能构造出原串的后缀树)

他的做法是建完一个串的SAM后回到根,对下个串s先匹配

如果转移到的节点在SAM中可接受的最长串长度=当前匹配s的长度,那么这个节点可以代表s

否则的话就像SAM一样新开一个节点,感觉和可持久化的思想很像

这样每个节点可能代表了多个串的子串,并且也没有多出来奇怪的子串

而且每个节点可接受串出现在多少个串中依然=parent树的子树中,出现了多少种不同的权值

这样我们可以像刚才一样,求出每个点出现次数,如果大于等于k,

那么根据之前的性质,这个点p可接受串的长度为[max[fa[p]]+1,max[p]]

那么点p能做出的贡献即为max[p]-max[fa[p]],否则贡献为0

由于子串是某个后缀的前缀,所以每个字符串的答案等于所有这个字符串的后缀节点的从根到该节点的权值和

  1 type node=record
  2        po,next:longint;
  3      end;
  4 
  5 var e,w,pr:array[0..400010] of node;
  6     go:array[0..400010,'a'..'z'] of longint;
  7     sa,r,h,q,p,c,d,cur,a,b,mx,fa:array[0..400010] of longint;
  8     g,ans:array[0..400010] of int64;
  9     n,k,l,ti,y,f,i,j,len,x,t,last:longint;
 10     s:ansistring;
 11     ch:char;
 12 
 13 function lowbit(x:longint):longint;
 14   begin
 15     exit(x and (-x));
 16   end;
 17 
 18 procedure swap(var a,b:longint);
 19   var c:longint;
 20   begin
 21     c:=a;
 22     a:=b;
 23     b:=c;
 24   end;
 25 
 26 procedure ins(x,y:longint);
 27   begin
 28     inc(len);
 29     e[len].po:=y;
 30     e[len].next:=p[x];
 31     p[x]:=len;
 32   end;
 33 
 34 procedure add(c:char;x:longint);
 35   var p,q,np,nq:longint;
 36   begin
 37     p:=last;
 38     inc(t); last:=t; np:=t;
 39     mx[np]:=mx[p]+1;
 40     while (p<>0) and (go[p,c]=0) do
 41     begin
 42       go[p,c]:=np;
 43       p:=fa[p];
 44     end;
 45     if p=0 then fa[np]:=1
 46     else begin
 47       q:=go[p,c];
 48       if mx[q]=mx[p]+1 then fa[np]:=q
 49       else begin
 50         inc(t); nq:=t;
 51         mx[nq]:=mx[p]+1;
 52         go[nq]:=go[q];
 53         fa[nq]:=fa[q];
 54         fa[q]:=nq; fa[np]:=nq;
 55         while go[p,c]=q do
 56         begin
 57           go[p,c]:=nq;
 58           p:=fa[p];
 59         end;
 60       end;
 61     end;
 62   end;
 63 
 64 procedure change(c:char);
 65   var p,np,q:longint;
 66   begin
 67     p:=go[last,c];
 68     if mx[p]=mx[last]+1 then last:=p
 69     else begin
 70       inc(t); np:=t;
 71       mx[np]:=mx[last]+1;
 72       go[np]:=go[p];
 73       fa[np]:=fa[p];
 74       fa[p]:=np;
 75       q:=last;
 76       while go[q,c]=p do
 77       begin
 78         go[q,c]:=np;
 79         q:=fa[q];
 80       end;
 81       last:=np;
 82     end;
 83   end;
 84 
 85 procedure dfs(x:longint);
 86   var i:longint;
 87   begin
 88     inc(ti);
 89     sa[ti]:=x;   //dfs序上对应哪个点
 90     b[x]:=ti;
 91     i:=p[x];
 92     while i<>0 do
 93     begin
 94       dfs(e[i].po);
 95       i:=e[i].next;
 96     end;
 97     r[x]:=ti;
 98   end;
 99 
100 procedure dfss(x:longint);
101   var i:longint;
102   begin
103     g[x]:=g[x]+g[fa[x]];
104     i:=p[x];
105     while i<>0 do
106     begin
107       dfss(e[i].po);
108       i:=e[i].next;
109     end;
110   end;
111 
112 procedure work(x,w:longint);
113   begin
114     while x<=t do
115     begin
116       inc(c[x],w);
117       x:=x+lowbit(x);
118     end;
119   end;
120 
121 function ask(x:longint):longint;
122   begin
123     ask:=0;
124     while x>0 do
125     begin
126       ask:=ask+c[x];
127       x:=x-lowbit(x);
128     end;
129   end;
130 
131 procedure put(x,y:longint);
132   begin
133     inc(len);  //这个串x所有后缀所在的节点
134     w[len].po:=y;
135     w[len].next:=q[x];
136     q[x]:=len;
137     pr[len].po:=x;  //节点代表了哪些串的后缀
138     pr[len].next:=h[y];
139     h[y]:=len;
140   end;
141 
142 function cmp(a,b:longint):boolean;
143   begin
144     exit(r[a]<r[b]);
145   end;
146 
147 procedure sort(l,r:longint);
148   var i,j,x:longint;
149   begin
150     i:=l;
151     j:=r;
152     x:=a[(l+r) shr 1];
153     repeat
154       while cmp(a[i],x) do inc(i);
155       while cmp(x,a[j]) do dec(j);
156       if not(i>j) then
157       begin
158         swap(a[i],a[j]);
159         inc(i);
160         dec(j);
161       end;
162     until i>j;
163     if lthen sort(l,j);
164     if ithen sort(i,r);
165   end;
166 
167 begin
168   readln(n,k);
169   last:=1; t:=1;
170   for i:=1 to n do
171   begin
172     readln(s);
173     l:=length(s); last:=1;
174     for j:=l downto 1 do
175     begin
176       if go[last,s[j]]<>0 then change(s[j]) //广义后缀树
177       else add(s[j],i);
178       put(i,last);
179     end;
180   end;
181   len:=0;
182   for i:=2 to t do
183     ins(fa[i],i);
184 
185   dfs(1);
186   for i:=1 to t do
187     a[i]:=i;
188   sort(1,t);
189   j:=1; x:=a[1];
190   for i:=1 to t do
191   begin
192     f:=h[sa[i]];
193     while f<>0 do  //因为一个节点可能代表了多个穿,插入相对麻烦
194     begin
195       y:=pr[f].po;
196       if cur[y]<>0 then work(cur[y],-1);
197       cur[y]:=i;
198       work(i,1);
199       f:=pr[f].next;
200     end;
201     while (j<=t) and (r[x]=i) do
202     begin
203       len:=ask(i)-ask(b[x]-1);
204       if lenthen g[x]:=0 else g[x]:=mx[x]-mx[fa[x]];
205       inc(j); x:=a[j];
206     end;
207     if j=t+1 then break;
208   end;
209   dfss(1);
210   for i:=1 to n do
211   begin
212     j:=q[i];
213     while j<>0 do
214     begin
215       x:=w[j].po;
216       ans[i]:=ans[i]+g[x];
217       j:=w[j].next;
218     end;
219   end;
220   for i:=1 to n do
221     write(ans[i],' ');
222   writeln;
223 end.
View Code

bzoj2806 第一道小强和阿米巴的题,解锁新成就

构造出标准作文库的SAM后,L0不难想到二分答案吧

然后我们可以求出以询问串每个位置i为结尾的最长子串长度P[i]

不难得到f[i]到i最长熟悉 f[i]=max(f[i-1],f[j]+i-j) (i-j>=l0 且 (i-j<=P[i])

然后这个是明显的单调队列优化吧

  1 var go:array[0..1200010*2,'0'..'1'] of longint;
  2     q,v,f:array[0..1200010] of longint;
  3     fa,mx:array[0..1200010*2] of longint;
  4     ans,mid,i,n,m,last,t,l,r,j:longint;
  5     s:ansistring;
  6     c:char;
  7 
  8 function max(a,b:longint):longint;
  9   begin
 10     if a>b then exit(a) else exit(b);
 11   end;
 12 
 13 procedure change(c:char);
 14   var q,p,np:longint;
 15   begin
 16     p:=go[last,c];
 17     if mx[p]=mx[last]+1 then last:=p
 18     else begin
 19       inc(t); np:=t;
 20       mx[np]:=mx[last]+1;
 21       go[np]:=go[p];
 22       fa[np]:=fa[p];
 23       fa[p]:=np;
 24       q:=last;
 25       while go[q,c]=p do
 26       begin
 27         go[q,c]:=np;
 28         q:=fa[q];
 29       end;
 30       last:=np;
 31     end;
 32   end;
 33 
 34 procedure add(c:char);
 35   var p,q,np,nq:longint;
 36   begin
 37     p:=last;
 38     inc(t); last:=t; np:=t;
 39     mx[np]:=mx[p]+1;
 40     while (p<>0) and (go[p,c]=0) do
 41     begin
 42       go[p,c]:=np;
 43       p:=fa[p];
 44     end;
 45     if p=0 then fa[np]:=1
 46     else begin
 47       q:=go[p,c];
 48       if mx[q]=mx[p]+1 then fa[np]:=q
 49       else begin
 50         inc(t); nq:=t;
 51         mx[nq]:=mx[p]+1;
 52         go[nq]:=go[q];
 53         fa[nq]:=fa[q];
 54         fa[q]:=nq; fa[np]:=nq;
 55         while go[p,c]=q do
 56         begin
 57           go[p,c]:=nq;
 58           p:=fa[p];
 59         end;
 60       end;
 61     end;
 62   end;
 63 
 64 procedure match;
 65   var i,j,l,t:longint;
 66   begin
 67     j:=1; t:=0;
 68     l:=length(s);
 69     for i:=1 to l do
 70     begin
 71       if go[j,s[i]]<>0 then
 72       begin
 73         inc(t);
 74         j:=go[j,s[i]];
 75       end
 76       else begin
 77         while (j<>0) and (go[j,s[i]]=0) do j:=fa[j];
 78         if j=0 then
 79         begin
 80           t:=0;
 81           j:=1;
 82         end
 83         else begin
 84           t:=mx[j]+1;
 85           j:=go[j,s[i]];
 86         end;
 87       end;
 88       v[i]:=t;
 89     end;
 90   end;
 91 
 92 function cmp(i,j:longint):boolean;
 93   begin
 94     exit(f[i]-ij);
 95   end;
 96 
 97 function check(l0:longint):boolean;
 98   var h,t,i,n:longint;
 99   begin
100     n:=length(s);
101     for i:=0 to l0-1 do
102       f[i]:=0;
103     h:=1; t:=0;
104     for i:=l0 to n do
105     begin
106       while (h<=t) and (cmp(q[t],i-l0)) do dec(t);
107       inc(t);
108       q[t]:=i-l0;
109       f[i]:=f[i-1];
110       while (h<=t) and (q[h]do inc(h);
111       if h<=t then f[i]:=max(f[i],f[q[h]]+i-q[h]);
112     end;
113     if f[n]/n>=0.89999999999 then exit(true) else exit(false);
114   end;
115 
116 begin
117   readln(n,m);
118   t:=1;
119   for i:=1 to m do
120   begin
121     readln(s);
122     last:=1;
123     l:=length(s);
124     for j:=1 to l do
125       if go[last,s[j]]<>0 then change(s[j])
126       else add(s[j]);
127   end;
128   for i:=1 to n do
129   begin
130     readln(s);
131     match;
132     l:=0;
133     r:=length(s);
134     while l<=r do
135     begin
136       mid:=(l+r) shr 1;
137       if check(mid) then
138       begin
139         ans:=mid;
140         l:=mid+1;
141       end
142       else r:=mid-1;
143     end;
144     writeln(ans);
145   end;
146 end.
2806

 

UPD:以前写的广义后缀树有点冗长,最近转c++重新写了一份感觉好多了……

以我之前写的2780为例

  1 #include
  2 #include
  3 #include
  4 #include
  5 #include
  6 #include
  7 
  8 using namespace std;
  9 vector<int> b[200010],q[10010];
 10 //b[]记录每个节点是哪些串的子串,q[]记录每个串所有后缀所在的节点
 11 struct way{int po,next;} e[200010];
 12 struct node{int w,id;} a[60010];
 13 int ans[60010],w[200010],go[200010][26],fa[200010],mx[200010],l[200010],r[200010],p[200010],c[200010];
 14 int len,t,last,n,m;
 15 char s[100010];
 16 
 17 bool cmp(node a,node b)
 18 {
 19      return l[a.w]<l[b.w];
 20 }
 21 
 22 void work(int c)  //比较优美的写法
 23 {
 24      int np,nq,q,p=last;
 25      if (!go[last][c])
 26      {
 27         np=++t;
 28         mx[np]=mx[p]+1;
 29         for (;p&&!go[p][c];p=fa[p]) go[p][c]=np;
 30      }
 31      else np=0; 
 32      if (!p) fa[np]=1;
 33      else {
 34           q=go[p][c];
 35           if (mx[q]==mx[p]+1) fa[np]=q;
 36           else {
 37                nq=++t; 
 38                mx[nq]=mx[p]+1;
 39                memcpy(go[nq],go[q],sizeof(go[q]));
 40                fa[nq]=fa[q]; fa[q]=fa[np]=nq;
 41                for (;go[p][c]==q;p=fa[p]) go[p][c]=nq;
 42           }
 43      }
 44      last=go[last][c];
 45 }
 46  
 47 void build(int x,int y)
 48 {
 49      e[++len].po=y;
 50      e[len].next=p[x];
 51      p[x]=len;
 52 }   
 53 
 54 void dfs(int x)
 55 {
 56      l[x]=++t; w[t]=x;
 57      for (int i=p[x];i;i=e[i].next)
 58      {
 59          dfs(e[i].po);
 60      }
 61      r[x]=t;
 62 }
 63    
 64 void add(int x,int w)
 65 {
 66      for (int i=x;i<=t;i+=i&-i) c[i]+=w;
 67 }         
 68 
 69 int ask(int x)
 70 {
 71      int s=0;
 72      for (int i=x;i;i-=i&-i) s+=c[i];
 73      return s;
 74 }
 75 int main()
 76 {
 77     scanf("%d%d",&n,&m);
 78     t=last=1;
 79     for (int i=1; i<=n; i++)
 80     {
 81         scanf("%s",s+1); len=strlen(s+1); 
 82         last=1;
 83         for (int j=1; j<=len;j++)
 84         {
 85             work(s[j]-'a');   
 86             b[last].push_back(i);
 87         }         
 88     }
 89     len=fa[0]=0;
 90     for (int i=2; i<=t; i++) build(fa[i],i);
 91     t=0; dfs(1);
 92     for (int i=1; i<=m; i++)
 93     {
 94         scanf("%s",s+1); len=strlen(s+1);
 95         int j=1;
 96         for (int k=1;k<=len;k++)
 97         {
 98             if (!go[j][s[k]-'a']) {j=0;break;}
 99             j=go[j][s[k]-'a'];
100         }
101         a[i].w=j; a[i].id=i;
102     }
103     sort(a+1,a+1+m,cmp);
104     vector<int>::iterator k;
105     for (int i=1;i<=t;i++)
106         for (k=b[w[i]].begin();k!=b[w[i]].end(); k++) q[*k].push_back(i);
107     vector<int>::iterator cur[10010];
108     for (int i=1; i<=n; i++)
109     {
110         cur[i]=q[i].begin();
111         if (cur[i]!=q[i].end()) {add(*cur[i],1); cur[i]++;}
112     }
113     int j=1;
114     while (!a[j].w) j++;  
115     for (int i=1; i<=t; i++)
116     {
117         for (;l[a[j].w]==i; j++) ans[a[j].id]=ask(r[a[j].w])-ask(l[a[j].w]-1);
118         for (k=b[w[i]].begin();k!=b[w[i]].end(); k++)
119         {
120             int x=*k;
121             if (cur[x]!=q[x].end()) {add(*cur[x],1); cur[x]++;}
122         }
123     }        
124     for (int i=1; i<=m; i++) printf("%d\n",ans[i]);
125     system("pause");
126     return 0;
127 }
2780(c++更新版)

 

转载于:https://www.cnblogs.com/phile/p/4511571.html

你可能感兴趣的:(关于广义后缀树(多串SAM)的总结)