ac自动机实现多模式匹配

用到了java中的hashMap,可以节约内存。

支持中文,可直接添加使用。

代码的注释还比较详细:


/**
 *
 * @author hit_fantasy
 */
import java.util.*;

public class TrieCheck {

    //根节点root
    private TrieNode root;

    public TrieCheck() {
        root = new TrieNode();
    }
//测试

    public static void main(String[] args) {
        TrieCheck test = new TrieCheck();
        test.addWord("shr");
        test.addWord("say");
        test.addWord("her");
        test.addWord("he");
        test.addWord("?");
        test.buildFail(test.root);
        boolean haha = test.hasBadWord("shrhaha?heabasd");
    }
//添加模式串

    public void addWord(String word) {
        if (word == null || word.length() == 0) {
            return;
        }
        TrieNode current = root;
        for (int i = 0; i < word.length(); i++) {
            char code = word.charAt(i);
            current = current.add(code);
        }
        current.end = true;
        current.word = word;
    }
//查找过程

    public boolean hasBadWord(String text) {
        if (text == null || text.length() == 0 || root == null) {
            return false;
        }
        TrieNode current = root;
        TrieNode next;
        boolean res = false;
        for (int i = 0; i < text.length(); i++) {
            next = current.child.get(text.charAt(i));
            while (next == null) {//失配,转向失效函数
                //转到自己对应的失效节点
                current = current.fail.get(current);
                if (current.end) {
                    res = true;
                    System.out.println(current.word);
                }
                //尝试继续往下走,直到能走通(next != null) 
                next = current.child.get(text.charAt(i));
                //或者到了根节点 
                if (current == root && next == null) {
                    break;
                }
            }
            if (next != null) {
                current = next;
            }
            if (current.end) {
                res = true;
                System.out.println(current.word);
            }
        }
        return res;
    }
//建立失效函数

    public void buildFail(TrieNode root) {
        //队列queue在构建失效函数时使用
        Queue queue = new LinkedList();
        //root节点特殊处理
        TrieNode current, next;
        root.fail.put(root, root);
        //根节点和根的孩子节点特殊处理,指向root
        TrieNode father = root;
        for (Map.Entry entry : root.child.entrySet()) {
            entry.getValue().fail.put(entry.getValue(), father);
            queue.add(entry.getValue());
        }
        while (!queue.isEmpty()) {
            father = queue.remove();
            for (Map.Entry entry : father.child.entrySet()) {
                //当前节点父状态的失效节点,输入当前字符能走到哪
                TrieNode tempFather = father;
                current = tempFather.fail.get(tempFather).child.get(entry.getKey());
                while (current == null) {
                    tempFather = tempFather.fail.get(tempFather);
                    current = tempFather.fail.get(tempFather).child.get(entry.getKey());
                    if (tempFather == root && current == null) {
                        entry.getValue().fail.put(entry.getValue(), root);
                        break;
                    }
                }
                if (current != null) {
                    entry.getValue().fail.put(entry.getValue(), current);
                }
                queue.add(entry.getValue());
            }
        }
    }

//Trie节点数据结构
    private static class TrieNode {

        //当前节点的子节点
        public Map child;
        //当前节点的失效节点
        public Map fail;
        //标识当前节点是否为一个词的结束字符
        private boolean end;
        //记录一个词,输出时使用
        public String word;
        public char value;

        public TrieNode() {
            child = new HashMap();
            fail = new HashMap();
            end = false;
            word = "";
        }
//构建Trie树过程

        public TrieNode add(char newChar) {
            TrieNode t = child.get(newChar);
            if (t == null) {
                t = new TrieNode();
                t.value = newChar;
                child.put(newChar, t);
            }
            return t;
        }
    }
}


你可能感兴趣的:(数据结构JAVA实现)