AC自动机的简单Java实现

AC自动机主要实现多模式字符匹配的快速查找,相关知识点为:

1.trie树

2.KMP算法

代码有相关注释,如下:

import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Iterator;

import com.git.books.a_lucene_java.aho_corasick.StringSearchResult;

/**
 * 

Title: MyOneACSearchTest.java

*

Description:ac自动机

*

Copyright: Copyright (c) 2017

*

Company: Sage

* @author 五虎将 * @date 2017年1月5日上午8:19:34 * @version 1.0 */ public class MyOneACSearchTest { public static void main(String[] args) { String[] keywords = new String[]{"我是好人","我是坏人","好人","坏人","世界","那么大","世界那么大","大"}; MyOneACSearchTest search = new MyOneACSearchTest(keywords); StringSearchResult[] findAll = search.findAll("我是好人吗?这事需要问问自己,人能分成好人坏人吗?这恐怕谁也无法解答.世界那么大,给你的想法那么大,我们世界里,只能想想大而已"); for (StringSearchResult result : findAll) { System.out.println(result.keyword() + " : " +result.index()); } } //构建树 //设置失败指针 //搜索过程 public MyOneACSearchTest(String[] keywords) { buildTree(keywords); addFailure(); } private TreeNode root; //查找全部的模式串 public StringSearchResult[] findAll(String text){ //可以找到 转移到下个节点 不能找到在失败指针节点中查找直到为root节点 ArrayList results = new ArrayList(); int index = 0; TreeNode mid = root; while(index mid = new ArrayList();//过程容器 for (TreeNode node : root.getSonsNode()) { node.failure = root; for (TreeNode treeNode : node.getSonsNode()) { mid.add(treeNode); } } //广度遍历所有节点设置失败指针 1.存在失败指针 2.不存在到root结束 while(mid.size()>0){ ArrayList temp = new ArrayList();//子节点收集器 for (TreeNode node : mid) { TreeNode r = node.getParent().failure; while(r!=null && !r.containNode(node.getChar())){ r = r.failure;//没有找到,保证最大后缀 (最后一个节点字符相同) } //是根结 if(r==null){ node.failure = root; }else{ node.failure = r.getSonNode(node.getChar()); //重叠后缀的包含 for (String result : node.failure.getResults()) { node.addResult(result); } } //收集子节点 for (TreeNode treeNode : node.getSonsNode()) { temp.add(treeNode); } } mid = temp; } root.failure = root; } private void buildTree(String[] keywords) { root = new TreeNode(null, ' '); //判断节点是否存在 存在转移 不存在添加 for (String word : keywords) { TreeNode temp = root; for (char ch : word.toCharArray()) { if(temp.containNode(ch)){ temp = temp.getSonNode(ch); }else{ TreeNode newNode = new TreeNode(temp, ch); temp.addSonNode(newNode); temp = newNode; } } temp.addResult(word); } } class TreeNode{ private TreeNode parent; private TreeNode failure; private char ch; private ArrayList results; private Hashtable sonsHash; private TreeNode[] sonsNode; public TreeNode(TreeNode parent,char ch) { this.parent = parent; this.ch = ch; results = new ArrayList(); sonsHash = new Hashtable(); sonsNode = new TreeNode[]{}; } //添加子节点 public void addSonNode(TreeNode node){ sonsHash.put(node.ch, node); sonsNode = new TreeNode[sonsHash.size()]; Iterator iterator = sonsHash.values().iterator(); for (int i = 0; i < sonsNode.length; i++) { if(iterator.hasNext()){ sonsNode[i] = iterator.next(); } } } //获取子节点中指定字符节点 public TreeNode getSonNode(char ch){ return sonsHash.get(ch); } //判断子节点中是否存在该字符 public boolean containNode(char ch){ return getSonNode(ch) !=null; } //添加一个结果到结果字符中 public void addResult(String result){ if(!results.contains(result)) results.add(result); } //获取字符 public char getChar(){ return ch; } //获取父节点 public TreeNode getParent(){ return parent; } //设置失败指针并且返回 public TreeNode setFailure(TreeNode failure){ this.failure = failure; return this.failure; } //获取所有的孩子节点 public TreeNode[] getSonsNode(){ return sonsNode; } //获取搜索的字符串 public ArrayList getResults(){ return results; } } }



你可能感兴趣的:(数据结构和算法)