KMP 算法 (Knuth-Morris-Pratt 字符串匹配算法)


先给代码,有时间了再回来补注释和算法说明。

---------------------------------------------------------------------------

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const int * get_prefix(const char * P)
{
int * pi = (int *)malloc(sizeof(int) * strlen(P));
pi[0] = -1;
int i = 1;
int j = -1;
while (P[i])
{
while (j >= 0 && P[j + 1] != P[i])
{
j = pi[j];
}
if (P[j + 1] == P[i])
{
++j;
}
pi[i] = j;
++i;
}
return pi;
}

void kmp_match(const char * T, const char * P)
{
const int * pi = get_prefix(P);
int i = 0;
int j = -1;
while (T[i])
{
while (j >= 0 && P[j + 1] != T[i])
{
j = pi[j];
}
if (P[j + 1] == T[i])
{
++j;
}
if (0 == P[j + 1])
{
printf("%s\n", T + i - j);
j = pi[j];
}
++i;
}
free(pi);
}

int main(int argc, char * argv[])
{
kmp_match("abcdabcdabcdabcd", "abc");

return 0;
}


参考:《算法导论》

---------------------------------------------------------------------------

/*
* Knuth-Morris-Pratt 字符串匹配算法的三种实现。
* 匹配部分都一样,差异只在求 next 数组。:)
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
* 实现一
*/
char * kmp1(char * content, char * pattern)
{
int i;
int j;
int len;
int * next;

if (NULL == content || NULL == pattern)
{
return NULL;
}

len = strlen(pattern);
next = (int *)malloc(len * sizeof(int));

/* Get the "next" array. */
next[0] = -1;
for (i = 1; pattern[i] != 0; ++i)
{
j = next[i - 1];
while (pattern[i - 1] != pattern[j] && j >= 0)
{
j = next[j];
}
next[i] = j + 1;
}

/* Match. */
i = 0;
j = 0;
while (content[i] && pattern[j])
{
if (content[i] == pattern[j])
{
++i;
++j;
}
else
{
j = next[j];
if (-1 == j)
{
++i;
++j;
}
}
}

free(next);

if (pattern[j])
{
return NULL;
}
else
{
return &content[i - j];
}
}

/*
* 实现二
*/
char * kmp2(char * content, char * pattern)
{
int i;
int j;
int len;
int * next;

if (NULL == content || NULL == pattern)
{
return NULL;
}

len = strlen(pattern);
next = (int *)malloc(len * sizeof(int));

/* Get the "next" array. */
next[0] = -1;
i = 0;
j = -1;
while (pattern[i])
{
if (-1 == j || pattern[i] == pattern[j])
{
++i;
++j;
next[i] = j;
}
else
{
j = next[j];
}
}

/* Match. */
i = 0;
j = 0;
while (content[i] && pattern[j])
{
if (content[i] == pattern[j])
{
++i;
++j;
}
else
{
j = next[j];
if (-1 == j)
{
++i;
++j;
}
}
}

free(next);

if (pattern[j])
{
return NULL;
}
else
{
return &content[i - j];
}
}

/*
* 实现三
*
* 实现二的改进,改进处见注释。
*/
char * kmp3(char * content, char * pattern)
{
int i;
int j;
int len;
int * next;

if (NULL == content || NULL == pattern)
{
return NULL;
}

len = strlen(pattern);
next = (int *)malloc(len * sizeof(int));

/* Get the "next" array. */
next[0] = -1;
i = 0;
j = -1;
while (pattern[i])
{
if (-1 == j || pattern[i] == pattern[j])
{
++i;
++j;

/* 此处是对实现二的改进。 */
if (pattern[i] == pattern[j])
{
next[i] = next[j];
}
else
{
next[i] = j;
}
}
else
{
j = next[j];
}
}

/* Match. */
i = 0;
j = 0;
while (content[i] && pattern[j])
{
if (content[i] == pattern[j])
{
++i;
++j;
}
else
{
j = next[j];
if (-1 == j)
{
++i;
++j;
}
}
}

free(next);

if (pattern[j])
{
return NULL;
}
else
{
return &content[i - j];
}
}

int main(int argc, char * argv[])
{
printf("%s\n", kmp1(argv[1], argv[2]));
printf("%s\n", kmp2(argv[1], argv[2]));
printf("%s\n", kmp3(argv[1], argv[2]));

return 0;
}


参考:
1. 《数据结构 (C 语言版)》,严蔚敏,吴伟民,P79-84
2. 字符串匹配的 KMP 算法详解
3. KMP

---------------------------------------------------------------------------

你可能感兴趣的:(数据结构,算法,J#)