在数据结构与算法学习过程中,为更加深刻理解算法的实现,本文对串部分算法进行了实现。其中数组、串和广义线性表相关算法都与线性表的相关算法相同,因此本文仅对课程中串部分算法进行了实现,并实现了串的匹配算法(BF、KMP)。本文仅提供算法代码参考,相关算法详解请参考青岛大学王卓老师的视频课程:数据结构与算法基础(青岛大学-王卓)
#include
#include
#include
#include
using namespace std;
// 函数结果状态代码
#define TRUE 1
#define FALSE 0
#define OK 1
#define ERROR 0
#define INFEASIBLE -1
#define OVERFLOW -2
#define MAXLEN 100
#define MAXSIZE 100
#define CHUNKSIZE 80
//Status 是函数的类型,其值是函数结果状态代码
typedef int Status;
顺序表实现串后,串的基本操作与顺序表相同,直接使用即可。
typedef int ElemType;
typedef struct {
ElemType* elem;
int length;
} SqList; //顺序表类型
Status InitList_Sq(SqList& L) { //构造一个空的顺序表L
L.elem = new ElemType[MAXSIZE]; //为顺序表分配空间
if (!L.elem) exit(OVERFLOW); //储存分配失败
L.length = 0; //空表长度为0
return OK;
}
void DestroyList(SqList& L) {
if (L.elem) delete L.elem; //释放存储空间
}
void ClearList(SqList& L) {
L.length = 0; //将线性表的长度置为0
}
int GetLength(SqList L) {
return (L.length);
}
int IsEmpty(SqList L) {
if (L.length == 0) return 1;
else return 0;
}
int GetElem(SqList L, int i, ElemType& e) {
if (i<1 || i>L.length) return ERROR;
// 判断i值是否合理,若不合理,返回ERROR
e = L.elem[i-1]; //第i-1的单元存储着第i个数据
return OK;
}
// 按值查找
int LocateElem(SqList L, ElemType e) {
//在线性表L中查找值为e的数据元素,返回其序号
for (int i = 0; i < L.length; i++)
if (L.elem[i] == e) return i + 1; //查找成功,返回序号
return 0;//查找失败,返回0
}
// 插入算法
Status ListInsert_Sq(SqList& L, int i, ElemType e) {
if (i<1 || i>L.length + 1)return ERROR; //i值不合法
if (L.length == MAXSIZE)return ERROR; //当前存储空间已满
for (int j = L.length - 1; j >= i - 1; j--)
L.elem[j + 1] = L.elem[j]; //插入位置及之后的元素后移
L.elem[i-1] = e; //将新元素e放入第i个位置
L.length++; //表长增1
return OK;
}
// 顺序存储结构
typedef struct {
char ch[MAXLEN + 1]; // 存储串的一维数组
int length; // 串的当前长度
}SString;
// 链式存储结构
typedef struct Chunk {
char ch[CHUNKSIZE];
struct Chunk* next;
}Chunk;
typedef struct {
Chunk* head, * tail; // 串的头指针和尾指针
int curlen; // 串的当前长度
}LString;
Status StrAssign(SString &S,string s) {
int s_length;
s_length = s.length();
for (int i = 1; i <= s_length; i++) {
S.ch[i] = s[i-1];
};
S.length = s_length;
return OK;
}
即简单匹配,主要思想为逐个字符的简单匹配。
// 串的模式匹配算法
// BF 暴力,时间效率低
int Index_BF(SString S, SString T,int pos) {
int i = pos; int j = 1;
while (i <= S.length && j <= T.length) {
if (S.ch[i] == T.ch[j]) {
++i;
++j;
}// 主串和子串依次匹配下一个字符
else {
i = i - j + 2; j = 1;
}// 主串、子串指针回溯重新开始下一次匹配
}
if (j >= T.length)return i - T.length;
else return 0;
}
主要思想为:通过对模式串先验处理,构造next数组。相当于匹配前先总结模式串的基本规律,即模式串中是否有重复出现的子串。
// KMP 速度快,理解难
int Index_KMP(SString S, SString T, int pos,SqList next) {
int i = pos; int j = 1;
while (i <= S.length && j <= T.length) {
if (j == 0 || S.ch[i] == T.ch[j]) {
i++; j++;
}
else {
GetElem(next, j, j);
}
}
if (j > T.length)return i - T.length; /*匹配成功*/
else return 0; /*返回匹配不成功标志*/
}
int get_next(SString T,SqList &next) {
ListInsert_Sq(next, 1, 0);
int i = 1;int j = 0;
while (i < T.length) {
// abcabcd
if (j == 0 || T.ch[i] == T.ch[j]) {
++i; ++j;
ListInsert_Sq(next, i, j);
}
else {
GetElem(next, j, j);
};
int e;
GetElem(next, i, e);
}
cout << endl;
return OK;
}
此方法仍有缺陷,当模式串前缀字符完全相同时(如aaaab中b的前缀),算法在计算第四个a的回溯位置时会一直循环到第三个a,但我们可以直接回溯到第一个a的位置。因此,可以继续优化代码:
int get_nextval(SString T, SqList& next) {
ListInsert_Sq(next, 1, 0);
int i = 1; int j = 0;
while (i < T.length) {
// abcabcd
if (j == 0 || T.ch[i] == T.ch[j]) {
++i; ++j;
// 增加一个判断,判断前缀字符是否相等,若是,则递归地回溯到第一个字符。
if (T.ch[i] != T.ch[j]) ListInsert_Sq(next, i, j);
else ListInsert_Sq(next, i, next.elem[j-1]);
}
else {
GetElem(next, j, j);
};
int e;
GetElem(next, i, e);
}
cout << endl;
return OK;
}
int main(){
SString S;
SString T;
string s = "bcabcdbcabcdefeaplj好nb哦,plj真nbbdcabd7bplj so nb, plj really nb.defabcdefaabcabcabcdefg";
string t = "plj so nb, plj really nb.";
clock_t startTime;
clock_t endTime;
int pos = 1;
StrAssign(S, s);
StrAssign(T, t);
SqList next;
InitList_Sq(next);
get_next(T, next); // 计算next数组
cout << "模式串:";
for (int i = 1; i <= t.length(); i++) {
cout << T.ch[i] << " ";
}
cout << endl;
cout << "next值:";
for (int i = 1; i <= t.length(); i++){
int e;
GetElem(next, i, e);
cout << e << " ";
}
cout << endl;
cout << endl;
startTime = clock();
int index_KMP = Index_KMP(S, T, pos, next);
endTime = clock();
double KMP_time = endTime - startTime;
startTime = clock();
int index_BF = Index_BF(S, T, pos);
endTime = clock();
double BF_time = endTime - startTime;
cout << "位置计算完毕" << endl;
cout << "index_BF: " << index_BF << endl;
cout << "time_BF: " << BF_time << endl;
cout << "index_KMP: " << index_KMP << endl;
cout << "time_KMP: " << KMP_time << endl;
system("pause");
return 0;
}
int main(){
SString S;
SString T;
string s = "aaabaaaab";
string t = "aaaab";
clock_t startTime;
clock_t endTime;
int pos = 1;
StrAssign(S, s);
StrAssign(T, t);
SqList next;
SqList nextval;
InitList_Sq(next);
InitList_Sq(nextval);
get_next(T, next); // 计算next数组
get_nextval(T, nextval); // 计算nextval数组【改进后的next计算算法】
cout << "位 置" << "\t" << "模式串" << "\t" << "next值" << "\t" << "nextval值" <<endl;
for (int i = 1; i <= t.length(); i++) {
int e;
int eval;
GetElem(next, i, e);
GetElem(nextval, i, eval);
cout << i << "\t" << T.ch[i] << "\t" << e << "\t" << eval << endl;
}
cout << endl;
startTime = clock();
int index_KMP = Index_KMP(S, T, pos, next);
endTime = clock();
double KMP_time = endTime - startTime;
startTime = clock();
int index_BF = Index_BF(S, T, pos);
endTime = clock();
double BF_time = endTime - startTime;
cout << "位置计算完毕" << endl;
cout << "index_BF: " << index_BF << endl;
cout << "time_BF: " << BF_time << endl;
cout << "index_KMP: " << index_KMP << endl;
cout << "time_KMP: " << KMP_time << endl;
system("pause");
return 0;
}