实验名称:测试数据匹配效率问题
一、实验要求 2
1. 任意选择一种语言实现horspool算法 2
2. 任意选择一种语言实现Boyer-Moore算法 2
3. 任意选择一种语言实现3.2节中的蛮力算法 2
4. 在随机的二进制文本中匹配随机的二进制模式 2
5. 在自然语言文本中匹配随机的二进制模式 2
二、实验平台 2
1. 编译环境:dev/vs2017 2
2. 运行环境:Microsoft windows10 2
三、 核心代码 2
1. horspool算法 2
i += table[S[i] - 32];//模式串向右移动 3
2. Boyer-Moore算法 3
i += badchar(str[i], pat); 4
3. 蛮力法算法 4
4. CPP源文件 4
i += table[S[i] - 32];//模式串向右移动 5
i += badchar(str[i], pat); 6
t = rand() % 26; 7
y = t + x; 7
四、 效率分析 9
1.二进制文件 9
2.英文 9
int Horspool(const char *S, const char *T)
{
int n = strlen(S);
int m = strlen(T);
vector table(96, m);//以字母表中可打印字符为索引的数组
for (int i = 0; i < m - 1; i++){
table[T[i] - 32] = m - 1 - i;//模式串中每个字符的移动距离,从左至右扫描模式,相同字符的最后一次改写恰好是该字符在模式串的最右边
}
int i = m - 1;
while (i <= n - 1){
int k = 0;
while (k <= m - 1 && T[m - 1 - k] == S[i - k])
k++;
if (k == m)
return i - m + 1;//匹配成功,返回索引
else
i += table[S[i] - 32];//模式串向右移动
}
return -1;//匹配失败
}
int badchar(char bad, string& pat) {
int terminal = pat.length() - 1;int i = terminal;while (i >= 0) {if (pat[i] == bad) {return terminal - i;}i--;}return terminal - i;} int goodsuffix(int j, string& pat) {int terminal = pat.length() - 1;int tail = terminal;bool encounter = false;while (j >= 0) {if (pat[j] == pat[tail]) {encounter = true;tail--;j--;}else if (pat[j] != pat[tail] && encounter) break;else j--;}return terminal - j;} int boyer_moore(string& str, string& pat) {if (pat.length()>str.length()) return -1;int i = pat.length() - 1;while (i= 0) {if (str[i] != pat[j] && matched == false) {i += badchar(str[i], pat);}if (str[i] == pat[j]) {i--;j--;}if (i>0 && j>0 && str[i] != pat[j] && matched == true) {i += max(badchar(str[i], pat), goodsuffix(j, pat));}}return i + 1;}return -1;}
3. 蛮力法算法
int violenceSearch(const char *S, const char *T) {
if (S == NULL || T == NULL) return -1;
int n = strlen(S);
int m = strlen(T);
int i = 0;
while (i + m <= n) {
int k = i, j = 0;
for (; j
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
#define ERROR 0
#define TRUE 1
int Horspool(const char *S, const char *T)
{
int n = strlen(S);
int m = strlen(T);
vector table(96, m);//以字母表中可打印字符为索引的数组
for (int i = 0; i < m - 1; i++){
table[T[i] - 32] = m - 1 - i;//模式串中每个字符的移动距离,从左至右扫描模式,相同字符的最后一次改写恰好是该字符在模式串的最右边
}
int i = m - 1;
while (i <= n - 1){
int k = 0;
while (k <= m - 1 && T[m - 1 - k] == S[i - k])
k++;
if (k == m)
return i - m + 1;//匹配成功,返回索引
else
i += table[S[i] - 32];//模式串向右移动
}
return -1;//匹配失败
}
int violenceSearch(const char *S, const char *T) {
if (S == NULL || T == NULL) return -1;
int n = strlen(S);
int m = strlen(T);
int i = 0;
while (i + m <= n) {
int k = i, j = 0;
for (; j= 0) {
if (pat[i] == bad) {
return terminal - i;
}
i--;
}
return terminal - i;
}
int goodsuffix(int j, string& pat) {
int terminal = pat.length() - 1;
int tail = terminal;
bool encounter = false;
while (j >= 0) {
if (pat[j] == pat[tail]) {
encounter = true;
tail--;
j--;
}
else if (pat[j] != pat[tail] && encounter) break;
else j--;
}
return terminal - j;
}
int boyer_moore(string& str, string& pat) {
if (pat.length()>str.length()) return -1;
int i = pat.length() - 1;
while (i= 0) {
if (str[i] != pat[j] && matched == false) {
i += badchar(str[i], pat);
}
if (str[i] == pat[j]) {
i--;
j--;
}
if (i>0 && j>0 && str[i] != pat[j] && matched == true) {
i += max(badchar(str[i], pat), goodsuffix(j, pat));
}
}
return i + 1;
}
return -1;
}
int main() {
int n;
cout << "请输入随机生成二进制文本的长度:";
cin >> n;
//生成随机二进制文档
ofstream fout("binary.txt");
srand(time(NULL));
/*for (int i = 0; i < n; i++)
fout << rand() % 2;*/
char x = 'a', y;
int t;
for (int i = 0; i> str;
char* ch1 = new char [n];
char* ch = (char*)str.c_str();
ifstream fin("binary.txt");
for (int i = 0; i < n; i++)
fin>>ch1[i];
fin.close();
clock_t violence_start, violence_end;
//暴力算法匹配二进制
violence_start = clock();
int violence_res = violenceSearch(ch1, ch);
cout << "二进制字符串暴力匹配的结果是:" << violence_res << endl;
violence_end = clock();
cout << "二进制字符串暴力匹配的耗时是:" << double(violence_end - violence_start) / CLOCKS_PER_SEC << "s" << endl;
cout << endl << endl;
//horspool算法匹配二进制
clock_t horspool_start, horspool_end;
horspool_start = clock();
int horspool_res = Horspool(ch1, ch);
cout << "二进制字符串horspool匹配的结果是:" << horspool_res << endl;
horspool_end = clock();
cout << "二进制字符串horspool匹配的耗时是:" << double(horspool_end - horspool_start) / CLOCKS_PER_SEC << "s" << endl;
cout << endl << endl;
//boyer_moore算法匹配二进制 str1是文本 str是待搜索
string str1 = ch1;
clock_t boyer_moore_start, boyer_moore_end;
boyer_moore_start = clock();
string &rb = str1, &ra = str;
int boyer_moore_res = boyer_moore(rb, ra); //BM算法
cout << "二进制字符串boyer_moore匹配的结果是:" << boyer_moore_res << endl;
boyer_moore_end = clock();
cout << "二进制字符串boyer_moore匹配的耗时是:" << double(boyer_moore_end - boyer_moore_start) / CLOCKS_PER_SEC << "s" << endl;
return 0;
}
算法名称 算法效率(s) 程序测试n值 |
100 |
1000 |
10000 |
100000 |
Horspool |
0.002 |
0.001 |
0.002 |
0.001 |
Boyer-Moore |
0.001 |
0.001 |
0.001 |
0.001 |
蛮力法 |
0.002 |
0.001 |
0.002 |
0.001 |
2.英文
算法名称 算法效率(s) 程序测试n值 |
100 |
1000 |
10000 |
100000 |
Horspool |
0.002 |
0.002 |
0.002 |
0.002 |
Boyer-Moore |
0.002 |
0.001 |
0.001 |
0.002 |
蛮力法 |
0.001 |
0.001 |
0.002 |
0.002 |