给定一个目标串串T和若干个模式串P,设计一个算法去匹配每一个模式串。
思路:
多模式串匹配问题(设m为目标串的长度,n为模式串的平均长度)。可以用后缀trie树,时间复杂度为O(m^2 + kn)。利用AC自动机的时间复杂度为O(m + kn + z)(其中z为T中出现的模式串个数)。还可以用后缀树,后缀树的方法比较复杂,这里不做介绍。
下面是后缀trie树的代码
数组形式:
#include <iostream> #include <string> #include <vector> using namespace std; class Trie { public: static const int CLD = 26; int size; vector<int> trie; Trie(const string& s) { int len = s.size(); for (int i = 0; i < len * len * CLD; ++i) trie.push_back(-1); size = 0; for (int i = 0; i < s.size(); ++i) { string sub(s, i, s.size() - 1); Insert(sub); } } void Insert(const string& s) { if (s.size() == 0) return; int index = 0; for (int i = 0; i < s.size(); ++i) { int j = s[i] - 'a'; if (trie[index * CLD + j] == -1) trie[index * CLD + j] = ++size; index = trie[index * CLD + j]; } } bool Search(const string& s) { if (s.size() == 0) return false; int index = 0; for (int i = 0; i < s.size(); ++i) { int j = s[i] - 'a'; if (trie[index * CLD + j] == -1) return false; index = trie[index * CLD + j]; } return true; } }; void main() { string s("mississipi"); cout << s.size() << endl; Trie trie(s); vector<string> svec; svec.push_back("is"); svec.push_back("sip"); svec.push_back("hi"); svec.push_back("sis"); svec.push_back("mississippa"); for (int i = 0; i < 5; ++i) cout << trie.Search(svec[i]) << endl; }
树的形式:
#include <iostream> #include <string> #include <vector> #include <assert.h> using namespace std; const int CLD = 26; struct TNode { vector<TNode*> pcld; TNode() { for (int i = 0; i < CLD; ++i) pcld.push_back(NULL); } }; void Insert(TNode*& root, const string& s) { assert(root != NULL && s.size() > 0); TNode* temp = root; for (int i = 0; i < s.size(); ++i) { int j = s[i] - 'a'; if (temp->pcld[j] == NULL) { TNode* tn = new TNode(); temp->pcld[j] = tn; } temp = temp->pcld[j]; } } bool Search(TNode* root, const string& s) { assert(root != NULL && s.size() > 0); TNode* temp = root; for (int i = 0; i < s.size(); ++i) { int j = s[i] - 'a'; if (temp->pcld[j] == NULL) return false; temp = temp->pc ld[j]; } return true; } void main() { string s("mississipi"); TNode* root = new TNode(); for (int i = 0; i < s.size(); ++i) { string sub(s, i); Insert(root, sub); } vector<string> svec; svec.push_back("is"); svec.push_back("sip"); svec.push_back("hi"); svec.push_back("sis"); svec.push_back("mississippa"); for (int i = 0; i < 5; ++i) cout << Search(root, svec[i]) << endl; }
以下是AC自动机代码:
#include <iostream> #include <string> #include <vector> #include <queue> #include <assert.h> using namespace std; const int CLD = 26; struct TNode { vector<TNode*> pcld; TNode* fail; bool tag; TNode() { for (int i = 0; i < CLD; ++i) pcld.push_back(NULL); tag = false; } }; void Insert(TNode*& root, const string& s) { assert(root != NULL && s.size() > 0); TNode* temp = root; for (int i = 0; i < s.size(); ++i) { int j = s[i] - 'a'; if (temp->pcld[j] == NULL) { TNode* tn = new TNode(); temp->pcld[j] = tn; } temp = temp->pcld[j]; } temp->tag = true; } void Build(TNode*& root) { assert(root != NULL); queue<TNode*> que; que.push(root); root->fail = NULL; while (!que.empty()) { TNode* cur = que.front(); que.pop(); for (int i = 0; i < CLD; ++i) { if (cur->pcld[i] == NULL) continue; TNode* temp = cur->fail; while (temp != NULL && temp->pcld[i] == NULL) temp = temp->fail; if (temp == NULL) cur->pcld[i]->fail = root; else cur->pcld[i]->fail = temp->pcld[i]; que.push(cur->pcld[i]); } } } int Search(TNode* root, const string& s) { assert(root != NULL && s.size() > 0); TNode* temp = root; int res = 0; for (int i = 0; i < s.size(); ++i) { int j = s[i] - 'a'; while (temp != root && temp->pcld[j] == NULL) temp = temp->fail; temp = temp->pcld[j]; if (temp == NULL) temp = root; TNode* p = temp; while (p != root) { if (p->tag) ++res; p = p->fail; } } return res; } void main() { string s("missisip"); TNode* root = new TNode(); vector<string> svec; svec.push_back("is"); svec.push_back("sip"); svec.push_back("ssis"); svec.push_back("sis"); svec.push_back("missisip"); svec.push_back("ip"); for (int i = 0; i < svec.size(); ++i) Insert(root, svec[i]); Build(root); cout << Search(root, s) << endl; }