The twenty-first century is a biology-technology developing century. We know that a gene is made of DNA. The nucleotide bases from which DNA is built are A(adenine), C(cytosine), G(guanine), and T(thymine). Finding the longest common subsequence between DNA/Protein sequences is one of the basic problems in modern computational molecular biology. But this problem is a little different. Given several DNA sequences, you are asked to make a shortest sequence from them so that each of the given sequence is the subsequence of it.
For example, given “ACGT”,”ATGC”,”CGTT” and “CAGT”, you can make a sequence in the following way. It is the shortest but may be not the only one.
从n个串中找出一个最短的公共串(也许应该说序列吧,因为不要求连续,即只要保持相对顺序就好)。
一开始自以为是爆搜开了,因为已经知道了可以用迭代加深搜索。所谓迭代加深搜索,就是每次都限制了DFS的深度,若搜不到答案,则加深深度,继续搜索,这样就防止了随着深度不断加深而进行的盲目搜索,而且,对于这种求最短长度之类的题目,只要找到可行解,即是最优解了。所以就这样敲完代码了,敲完之后,悲剧TLE。
少了一步十分重要的剪枝,就是每次DFS的时候,都要判断一下,当前的深度+最少还有加深的深度是否大于限制的长度,若是,则退回。
以下是超时代码:
#include
#include
#include
#include
using namespace std;
char str[10][10];//记录n个字符串
int n,ans,deep,size[10],T;
char DNA[4]={'A','C','G','T'};
int max(int x,int y)
{
if (x>=y) return y;
else return x;
}
int min(int x,int y)
{
if (x<=y) return x;
else return y;
}
void dfs(int cnt,int len[])
{
int maxx=0;//预计还要匹配的字符串的最大长度
if(cnt>deep) return;
for(int i=0;iint t=size[i]-len[i];
if(t>maxx) maxx=t;
}
if(maxx==0)//条件全部满足即为最优解
{
ans=cnt;
return;
}
if(cnt+maxx>deep) return ;
for(int i=0;i<4;i++)
{
int pos[10];
int flag=0;
for(int j=0;jif(str[j][len[j]]==DNA[i])
{
flag=1;
pos[j]=len[j]+1;
}
else pos[j]=len[j];
}
if(flag==1) dfs(cnt+1,pos);
if(ans!=-1) break;
}
}
int main()
{
scanf("%d",&T);
while(T--)
{
scanf("%d",&n);
int maxn=0;
for(int i=0;iscanf("%s",str);
size[i]=strlen(str[i]);
maxn=max(size[i],maxn);
}
ans=-1;
deep=maxn;
int pos[10];//记录n个字符串目前匹配到的位置
memset(pos,0,sizeof(pos));
while(1)
{
dfs(0,pos);
if(ans!=-1) break;
deep++;//加深迭代
}
printf("%d\n",ans);
}
return 0;
}
以下是AC代码:
#include
#include
#include
#include
#include
using namespace std;
char str[10][10];
int n,ans,len,size[10];
char DNA[4]={'A','C','G','T'};
void dfs(int cnt,int len1[])
{
if(cnt>len)
return ;
int max=0;
for(int i=0;i//找出最长还要加深的深度
{
int t=size[i]-len1[i];
if(t>max)
max=t;
}
if(max==0)
{
ans=cnt;
return ;
}
if(cnt+max>len)
return ;
for(int i=0;i<4;i++)
{
int p[10];
int flag=0;
for(int j=0;jif(str[j][len1[j]]==DNA[i])//表示该字母可以往下搜索
{
flag=1;
p[j]=len1[j]+1;
}
else p[j]=len1[j];
}
if(flag)
dfs(cnt+1,p);
if(ans!=-1)
break;
}
}
int main()
{
int cas;
scanf("%d",&cas);
while(cas--)
{
scanf("%d",&n);
int max1=0;
for(int i=0;iscanf("%s",str[i]);
size[i]=strlen(str[i]);
if(size[i]>max1)//找出最长的串的长度,作为初始时的迭代DFS的限制
max1=size[i];
}
ans=-1;
len=max1;
int p[10]={0};//记录当前深度下,每一个串已经匹配过的长度
while(true)
{
dfs(0,p);
if(ans!=-1)
break;
len++;//加深迭代
}
printf("%d\n",ans);
}
}