字符串中连续出现最多的子串 & 字符串中最长重复子串,这两个问题都可以用后缀数组来表示,至于后缀数组可以参考编程珠玑P156;后缀数组就是定义一个数组指针,分别指向字符串中的对应位置,如下:
a b c a b c a b c d e .substr[0]
b c a b c a b c d e ....substr[1]
c a b c a b c d e .......substr[2]
a b c a b c d e ..........substr[3]
b c a b c d e .............substr[4]
c a b c d e ...............substr[5]
a b c d e .................substr[6]
b c d e ...................substr[7]
c d e .....................substr[8]
d e ........................substr[9]
e ..........................substr[10]
上面的 substr 就是abcabcabcde的后缀数组;string MaxTimesOfContinue(string str)
{
int len = str.length();
int maxCount = 0;
string longest = "";
for (int i = 0; i < len; ++i)
{
for (int j = i + 1; j < len; ++j)
{
if (str.substr(i, j - i) == str.substr(j, j - i))
{
int offset = j - i;
int count = 2;
for (int k = j + offset; j <= len; k += offset)
{
if (str.substr(i, offset) == str.substr(k, offset))
++count;
else
break;
}
if (count > maxCount)
{
maxCount = count;
longest = str.substr(i, offset);
}
}
}
}
return longest;
}
int Comlen(char *str1, char *str2)
{
int i = 0;
while(*str2 && (*str1++ == *str2++))
++i;
return i;
}
int MaxLength(char *str)
{
if(str == NULL)
return 0;
int maxLen = 0;
int n = strlen(str);
int maxi, maxj;
for (int i = 0; i < n; ++i)
{
for(int j = i + 1; j < n; ++j)
{
int thisLen = 0;
if ((thisLen = Comlen(&str[i], &str[j])) > maxLen)
{
maxLen = thisLen;
maxi = i;
maxj = j;
}
}
}
return maxLen;
}
将后缀数组按字典排序
a[0]:a
a[1]:ana
a[2]:anana
a[3]:banana
a[4]:na
a[5]:nana
之后比较相邻两个子串即可:
int Comlen(char *str1, char *str2)
{
int i = 0;
while(*str2 && (*str1++ == *str2++))
++i;
return i;
}
int Pstrcmp(const void *a, const void *b)
{
return strcmp(*(char**)a, *(char**)b);
}
//char *a[11];
int MaxLength(char *str)
{
if(str == NULL)
return 0;
int maxLen = 0;
int len = strlen(str);
char **a = new char *[len + 1];
for (int i = 0; i < len ; ++i)
a[i] = &str[i];
qsort(a, len , sizeof(char *), Pstrcmp);
for (int i = 0; i < len - 1; ++i)
if(Comlen(a[i], a[i+1]) > maxLen)
maxLen = Comlen(a[i], a[i+1]);
return maxLen;
}