Java手写字典树(Trie树),实现敏感词过滤

1.简介

字典树:也叫做前缀树,是一种高效的存储、配对字符串的数据结构,存储过程如下:

假设我们有单词:app、apple、cat。如果存在链表中:

["app"、"apple"、"api"、"cat"],要保存14个字符,使用字典树之后就变成了9个字符

Java手写字典树(Trie树),实现敏感词过滤_第1张图片

2.代码实现

话不多说,代码很简洁,直接看注释就能看懂,如下: 

package 算法;

import sun.reflect.generics.tree.Tree;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * @Author: stukk
 * @Description: 字典树
 **/
public class TrieTree {

    private TreeNode root;

    //    初始化树
    public TrieTree() {
        this.root = new TreeNode();
    }

    //    添加单词
    public void addWord(String word) {
        TreeNode treeNode = root;
        for (char ch : word.toCharArray()) {
            if (!treeNode.contains(ch)) {
                treeNode.addChildren(ch, new TreeNode());
            }
            treeNode = treeNode.getChildren(ch);
        }
        treeNode.setIsWord(true);
    }

    //    查找前缀匹配
    public TreeNode searchPrefix(String prefix) {
        TreeNode treeNode = root;
        for (char ch : prefix.toCharArray()) {
            if (!treeNode.contains(ch)) {
                return null;
            }
            treeNode = treeNode.getChildren(ch);
        }
        return treeNode;
    }

    //    查找单词
    public boolean searchWord(String word) {
        TreeNode node = searchPrefix(word);
        return node != null && node.getWord();
    }

    //   敏感词过滤 true代表通过过滤
    public boolean filter(String sentence) {
        TreeNode treeNode = root;
        int nowIndex = 0, startIndex = 0;
        while (startIndex < sentence.length()) {
            char ch = sentence.charAt(nowIndex);
            if (treeNode.contains(ch)) {
                treeNode = treeNode.getChildren(ch);
                if (treeNode.getWord()) {
                    //是一个违禁词
                    return false;
                }
                nowIndex++;
            } else {
                startIndex++;
                nowIndex = startIndex;
                treeNode = root;
            }
        }
        return true;
    }

    public void setWords(List words) {
        for (String word : words) {
            addWord(word);
        }
    }

    public static void main(String[] args) {
        TrieTree trieTree = new TrieTree();
        trieTree.setWords(Arrays.asList("你大爷"));
        boolean filter = trieTree.filter("我*你大爷的");
        if (filter) {
            System.out.println("不违规");
        } else {
            System.out.println("违规");
        }
    }


}

//树的结点
class TreeNode {
    private Map children;
    private Boolean isWord;

    public TreeNode() {
        this.children = new HashMap<>();
        this.isWord = false;
    }

    //    是否是这个字符
    public boolean contains(char ch) {
        return children.containsKey(ch);
    }

    //    添加字符
    public void addChildren(char ch, TreeNode treeNode) {
        children.put(ch, treeNode);
    }

    public TreeNode getChildren(char ch) {
        return children.get(ch);
    }

    public void setIsWord(boolean ok) {
        isWord = ok;
    }

    public Boolean getWord() {
        return isWord;
    }
}

你可能感兴趣的:(java,算法,数据结构)