值得注意的是:Compare_Substring 函数的实现,数组中存的是字符串的首地址,所以在调用qsort时候实现的比较函数应该是二维指针,以及isInSameString函数在判断两个相同的字符串常量时失效,要特殊处理一下字符串相等的情况。可以在判断时处理,其实也可以在进入函数时直接处理字符串完全相等的情况。同时给出利用后缀数组求最长重复子串的函数。
/******************************************************************* * * DESCRIPTION: Sida 20091104 * * AUTHOR:Neesky * * DATE:2009-11-11 * *******************************************************************/ /** include files **/ #include <iostream> using namespace std; /** * [Compare Function] * * @author NeeSky (2009-11-11) * * @param str1 * @param str2 * * @return int */ int Compare_Substring(const void *vstr1,const void *vstr2) { char **str1=(char** )vstr1; char **str2=(char** )vstr2; return strcmp(*str1,*str2); } /** * [Cautious]:判读两个子串是否属于同一个母字符串,对于两个相同的字符串 * 对于相同的两个字符串常量而言失效 * @author NeeSky (2009-11-11) * * @return bool */ bool isInSameString(const char *str1,const char *str2,const char *substra,const char *substrb) { if((substra-str1>=0&&(&str1[strlen(str1)-1])-substra>=0)&&(substrb-str1>=0&&(&str1[strlen(str1)-1])-substrb>=0)) return true; /*in str1*/ if((substra-str2>=0&&(&str2[strlen(str2)-1])-substra>=0)&&(substrb-str2>=0&&(&str2[strlen(str2)-1])-substrb>=0)) return true; /*in str2*/ return false; } /** * Common Chars Numbers * * @author NeeSky (2009-11-11) * * @param str1 * @param str2 * * @return int */ int SameContinuCharNums(const char *str1,const char *str2) { int i=0; while(i<strlen(str1)&&i<strlen(str2)) { if(str1[i]!=str2[i])break; ++i; } return i; } /** * Using Suffix Array to get the Com Substring * * @author NeeSky (2009-11-11) * * @return string */ char* GetLongestComSubstring_from2Strings(const char *str1,const char *str2) { int lenstr1=strlen(str1),lenstr2=strlen(str2),len=lenstr1+lenstr2; const char* SuffixString[len];/*construct the suffix array*/ for(int i=0; i<lenstr1; SuffixString[i]=&str1[i],++i); for(int j=0; j<lenstr2; SuffixString[j+lenstr1]=&str2[j],++j); std::qsort(SuffixString,len,sizeof(char*),Compare_Substring);/*sort*/ int comlen=0;const char* MaxComSubString=NULL;int k=1; while(k<len) { /*inside same string?*/ if(isInSameString(str1,str2,SuffixString[k],SuffixString[k-1])&&strcmp(str1,str2)!=0){++k;continue;} int curcomlen=SameContinuCharNums(SuffixString[k],SuffixString[k-1]);/*compute the common length*/ if(curcomlen>comlen){comlen=curcomlen;MaxComSubString=SuffixString[k];} ++k; } char *arr=new char[comlen]; strncpy(arr,MaxComSubString,comlen); return arr; } /** * Using suffix array to get the longest repitive substring * * @author NeeSky (2009-11-11) * * @param strGiven * * @return char* */ char* GetLongestRepitiveSubstring(const char *strGiven) { int len=strlen(strGiven); const char* SuffixString[len]; for(int i=0; i<len;SuffixString[i]=&strGiven[i],++i); std::qsort(SuffixString,len,sizeof(char*),Compare_Substring);/*sort*/ int maxlen=0; int k=1; const char *MaxComSubString=NULL; while(k<len) { int curcomlen=SameContinuCharNums(SuffixString[k],SuffixString[k-1]);/*compute the common length*/ if(curcomlen>maxlen){maxlen=curcomlen;MaxComSubString=SuffixString[k];} ++k; } char *arr=new char[maxlen+1]; strncpy(arr,MaxComSubString,maxlen); arr[maxlen]='/0'; return arr; } /** * Just Call GetMaxComSubstring_from2Strings Function * * @author NeeSky (2009-11-11) * * @param str1 * @param str2 */ void SolveProblemComSubstring(const char *str1,const char *str2) { char* maxcomstr=GetLongestComSubstring_from2Strings(str1,str2); cout<<"/nLongest Common Substring between /""<<str1<<"/" and /"" <<str2<<"/" is:(comlen="<<strlen(maxcomstr)<<")"<<flush; if(maxcomstr!=NULL)cout<<" /""<<maxcomstr<<"/""<<endl; if(maxcomstr!=NULL)delete maxcomstr; return; } /** *Just Call GetLongestRepitiveSubstring Function * * @author NeeSky (2009-11-11) * * @param str */ void SolveProblemRepitiveSubstring(const char *str) { char *maxrepitive=GetLongestRepitiveSubstring(str); cout<<"/nLongest Repitive Substring /""<<str<<"/" is:(comlen=" <<strlen(maxrepitive)<<")"<<flush; if(maxrepitive!=NULL)cout<<" /""<<maxrepitive<<"/""<<endl; if(maxrepitive!=NULL)delete maxrepitive; return; } /** * The Main Programming * * @author NeeSky (2009-11-11) * * @param argc * @param argv * * @return int */ int main (int argc, char *argv[]) { SolveProblemComSubstring("cdec","aacc"); SolveProblemComSubstring("cdec","bdec"); SolveProblemComSubstring("cdec","cdya"); SolveProblemComSubstring("caac","dedt"); SolveProblemComSubstring("aadd","aadd"); SolveProblemRepitiveSubstring("1234565dfc123456eeeedf"); SolveProblemRepitiveSubstring(""); SolveProblemRepitiveSubstring(" "); SolveProblemRepitiveSubstring("abcddd"); SolveProblemRepitiveSubstring("aaaaaa"); return(0); }
输出:
Debug/Sida20091104.exe Longest Common Substring between "cdec" and "aacc" is:(comlen=1) "c" Longest Common Substring between "cdec" and "bdec" is:(comlen=3) "dec" Longest Common Substring between "cdec" and "cdya" is:(comlen=2) "cd" Longest Common Substring between "caac" and "dedt" is:(comlen=0) "" Longest Common Substring between "aadd" and "aadd" is:(comlen=4) "aadd" Longest Repitive Substring "1234565dfc123456eeeedf" is:(comlen=6) "123456" Longest Repitive Substring "" is:(comlen=0) "" Longest Repitive Substring " " is:(comlen=1) " " Longest Repitive Substring "abcddd" is:(comlen=2) "dd" Longest Repitive Substring "aaaaaa" is:(comlen=5) "aaaaa"