Each test case begins with a positive integer k, the number of infringing segments to be found. Following this line are two code bases. The first code base is preceded by the line "BEGIN TDP CODEBASE" and contains a number of lines followed by the line "END TDP CODEBASE". The second code base is preceded by "BEGIN JCN CODEBASE" and followed by "END JCN CODEBASE". The line "END TDP CODEBASE" does not appear in the first code base and the line "END JCN CODEBASE" does not appear in the second. A line containing 0 follows the last test case.
For each test case you should output: (1) a line containing "CASE n" where n is the number of the test case; (2) up to k infringing segments. Each segment should be printed exactly as it appears in the JCN code base (including newlines and whitespace) and should be preceded by a line containing "INFRINGING SEGMENT m LENGTHl POSITION p" where m is the number of the segment within the test case, l is the length of the segment (in characters) and p is the position of the segment (in characters from the start of the JCN codebase). Output an empty line between test cases.
A code base is simply a string of characters. An infringing segment is a non-empty contiguous sequence of characters in the JCN code base that is textually identical to some contiguous sequence of characters in the TDP code base, and is not contained in a larger infringing segment. You should consider all characters in the code base, including spaces and the newline character at the end of each line.
If there are k or fewer common segments, print them all in decreasing order by length; if several segments have the same length, print them in the order they occur in JCN's code base. If there are more than ksegments, print the first k according to the given order.
You may assume that no code base contains more than 50,000 characters.
6 BEGIN TDP CODEBASE the quick brown fox jumps over the lazy dog. so there! END TDP CODEBASE BEGIN JCN CODEBASE now is the time for all good men to come to the aid of the party. so there! END JCN CODEBASE 100 BEGIN TDP CODEBASE xyzzy END TDP CODEBASE BEGIN JCN CODEBASE xyzzabczzyy END JCN CODEBASE 0
CASE 1 INFRINGING SEGMENT 1 LENGTH 12 POSITION 64 . so there! INFRINGING SEGMENT 2 LENGTH 5 POSITION 6 the INFRINGING SEGMENT 3 LENGTH 5 POSITION 42 o the INFRINGING SEGMENT 4 LENGTH 5 POSITION 43 the INFRINGING SEGMENT 5 LENGTH 5 POSITION 54 the INFRINGING SEGMENT 6 LENGTH 3 POSITION 15 fo CASE 2 INFRINGING SEGMENT 1 LENGTH 4 POSITION 0 xyzz INFRINGING SEGMENT 2 LENGTH 3 POSITION 7 zzy INFRINGING SEGMENT 3 LENGTH 2 POSITION 10 y
输出N个第二段抄袭第一段的地方,如果少于N个输出全部。如果多于N个,优先输出长的,长度相等优先输出位置靠前的。
把两个串连起来中间用分隔符隔开,求出height数组。有些后缀是在第一段中的,有些后缀是在第二段中的,可以根据sa判断这个后缀是在第一段还是第二段。按后缀大小顺序,正着遍历一遍可以得到第二段每个位置在第一段中后缀排它前面的抄袭长度,倒着遍历一遍可以得到第二段每个位置在第一段中后缀排它后面的抄袭长度,两者里面取大的。得到每个位置的抄袭后,处理掉覆盖的,排序输出。
注意换行符也算抄袭,输入会有空行的情况。
#include <cstdio> #include <cstring> #include <algorithm> #include <string> #include <iostream> using namespace std; const int MAXN=100010*2; const int MAXNODE=4*MAXN; const int LOGMAXN=50; const int INF=0x3f3f3f3f; int T,N,pos,len[MAXN]; char str[MAXN]; struct st{ int p,l; bool operator < (const st& rhs)const{ if(l==rhs.l) return p<rhs.p; return l>rhs.l; } }ans[MAXN]; struct SuffixArray{ int s[MAXN]; int c[MAXN]; int sa[MAXN]; int height[MAXN]; int rank[MAXN]; int t[MAXN],t2[MAXN]; int n; void clear(){ n=0; memset(sa,0,sizeof(sa)); } void build_sa(int m){ int i,*x=t,*y=t2; for(i=0;i<m;i++) c[i]=0; for(i=0;i<n;i++) c[x[i]=s[i]]++; for(i=1;i<m;i++) c[i]+=c[i-1]; for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i; for(int k=1;k<n;k<<=1){ int p=0; for(i=n-k;i<n;i++) y[p++]=i; for(i=0;i<n;i++) if(sa[i]>=k) y[p++]=sa[i]-k; for(i=0;i<m;i++) c[i]=0; for(i=0;i<n;i++) c[x[y[i]]]++; for(i=1;i<m;i++) c[i]+=c[i-1]; for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1; x[sa[0]]=0; for(i=1;i<n;i++) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++; if(p>=n) break; m=p; } } void build_height(){ int k=0; for(int i=0;i<n;i++) rank[sa[i]]=i; height[0]=0; for(int i=0;i<n-1;i++){ if(k) k--; int j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } }sa; void solve(){ memset(len,0,sizeof(len)); int MIN=-1; for(int i=0;i<sa.n;i++){ if(sa.sa[i]<pos) MIN=INF; else{ if(MIN==-1) continue; MIN=min(MIN,sa.height[i]); len[sa.sa[i]-pos-1]=max(len[sa.sa[i]-pos-1],MIN); } } MIN=-1; for(int i=sa.n-1;i>=0;i--){ if(sa.sa[i]<pos) MIN=INF; else{ if(MIN==-1) continue; MIN=min(MIN,sa.height[i+1]); len[sa.sa[i]-pos-1]=max(len[sa.sa[i]-pos-1],MIN); } } int now=-1,k=0; for(int i=0;i<sa.n-pos;i++){ if(i+len[i]<=now||len[i]<=0) continue; ans[k++]=(st){i,len[i]}; now=i+len[i]; } sort(ans,ans+k); k=min(k,N); for(int i=0;i<k;i++){ printf("INFRINGING SEGMENT %d LENGTH %d POSITION %d\n",i+1,ans[i].l,ans[i].p); for(int j=0;j<ans[i].l;j++) printf("%c",sa.s[pos+ans[i].p+1+j]); puts(""); } } int main(){ freopen("in.txt","r",stdin); int cas=0; while(scanf("%d",&N)!=EOF&&N){ sa.clear(); getchar(); gets(str); while(gets(str),strcmp(str,"END TDP CODEBASE")){ for(int i=0;str[i];i++) sa.s[sa.n++]=str[i]; sa.s[sa.n++]='\n'; } pos=sa.n; sa.s[sa.n++]=260; gets(str); while(gets(str),strcmp(str,"END JCN CODEBASE")){ for(int i=0;str[i];i++) sa.s[sa.n++]=str[i]; sa.s[sa.n++]='\n'; } sa.s[sa.n++]=0; sa.build_sa(270); sa.build_height(); if(cas) puts(""); printf("CASE %d\n",++cas); solve(); } return 0; }