java中敏感词过滤

这里记录一篇关于在实际开发中很常见的需求,就是对一些字段进行敏感词的过滤处理,封装了两个工具类

TrieNode类

import java.util.HashMap;
import java.util.Map;

public class TrieNode {
     
    private Map<Character, TrieNode> nodeMap;
    private boolean endFlag = false;

    public TrieNode(Character letter, Map<Character, TrieNode> nodeMap) {
     
        this.nodeMap = nodeMap;
    }

    public TrieNode addLetter(Character letter) {
     
        if (letter == null) {
     
            return this;
        } else {
     
            if (this.nodeMap == null) {
     
                this.nodeMap = new HashMap();
            }

            TrieNode node = (TrieNode)this.nodeMap.get(letter);
            if (node == null) {
     
                node = new TrieNode(letter, (Map)null);
                this.nodeMap.put(letter, node);
            }

            return node;
        }
    }

    public Map<Character, TrieNode> getNodeMap() {
     
        return this.nodeMap;
    }

    public boolean getEndFlag() {
     
        return this.endFlag;
    }

    public void setEndFlag(boolean endFlag) {
     
        this.endFlag = endFlag;
    }
}

TrieTree类

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Map;

public class TrieTree {
     
    private static TrieNode rootNode = new TrieNode((Character)null, (Map)null);

    private TrieTree() {
     }
	
	//对读取的敏感词文件进行处理
    public static void addString(String keyWord) {
     
        TrieNode node = rootNode;

        for(int i = 0; i < keyWord.length(); ++i) {
     
            Character c = keyWord.charAt(i); //获取敏感词中的单个字符,例如三、s、m等
            node = node.addLetter(c);//将单个字符进行判断,加入hashmap中
        }

        node.setEndFlag(true); //设置结束标识符为true
    }
	//对敏感词进行过滤,将敏感词字段使用mark字符进行替换
    public static String filter(String text, char mark) {
     
        if (text != null && text.trim() != null) {
     
            StringBuilder builder = new StringBuilder();
            int maxLength = text.length();

            for(int i = 0; i < maxLength; ++i) {
     
                TrieNode currentNode = rootNode;
                int j = i;

                boolean matchBadWord;
                for(matchBadWord = false; currentNode != null && j < maxLength; ++j) {
     
                    char target = text.charAt(j);
                    TrieNode node = (TrieNode)currentNode.getNodeMap().get(target);
                    if (node == null) {
     
                        break;
                    }

                    if (node.getEndFlag()) {
     
                        matchBadWord = true;
                        break;
                    }

                    currentNode = node;
                }

                if (!matchBadWord) {
     
                    builder.append(text.charAt(i));
                } else {
     
                    for(int k = j - i; k >= 0; --k) {
     
                        builder.append(mark);
                    }

                    i = j;
                }
            }

            return builder.toString();
        } else {
     
            return null;
        }
    }
	//判断是否存在敏感词
    public static boolean isContaintSensitiveWord(String text) {
     
        if (text != null && text.trim() != null) {
     
            int maxLength = text.length();

            for(int i = 0; i < maxLength; ++i) {
     
                TrieNode currentNode = rootNode;

                for(int j = i; currentNode != null && j < maxLength; ++j) {
     
                    char target = text.charAt(j);
                    TrieNode node = (TrieNode)currentNode.getNodeMap().get(target);
                    if (node == null) {
     
                        break;
                    }

                    if (node.getEndFlag()) {
     
                        return true;
                    }

                    currentNode = node;
                }
            }

            return false;
        } else {
     
            return false;
        }
    }
	//加载敏感词文件
    static {
     
        InputStreamReader read = null;
        InputStream fileInputStream = null;

        try {
     
            fileInputStream = TrieTree.class.getClassLoader().getResourceAsStream("SensitiveWord.txt");
            read = new InputStreamReader(fileInputStream);
            BufferedReader bufferedReader = new BufferedReader(read);
            String txt = null;

            while((txt = bufferedReader.readLine()) != null) {
     
                addString(txt);
            }
        } catch (Exception var12) {
     
        } finally {
     
            try {
     
                fileInputStream.close();
                read.close();
            } catch (IOException var11) {
     
            }

        }

    }
}

测试类

public class TestMain {
     
    public TestMain() {
     }
    public static void main(String[] args) {
     
        System.out.println(TrieTree.filter("我们去吃饭了三级", '*'));
        System.out.println(TrieTree.isContaintSensitiveWord("sm"));
    }
}

java中敏感词过滤_第1张图片
封装完成后,进行打包,就可以生成一个jar文件,别人使用直接将jar文件执行mvn install命令就可以将其安装到mavern本地仓库中,在pom.xml文件引入这个依赖即可使用
在jar文件所在的目录,这个需要提前配置maven的环境变量,执行如下命令即可将jar安装到maven的本地仓库

mvn install:install-file -DgroupId=com.xxx -DartifactId=sensitivewordfiltercommon -Dversion=1.0 -Dfile=sensitivewordfiltercommon-1.0.jar -Dpackaging=jar -DgeneratePom=true

最后如果要上传代码到测试环境,将这个jar放到服务下的ROOT目录下的WEB-INF目录下的lib目录中,重启服务器就可以生效了
java中敏感词过滤_第2张图片

你可能感兴趣的:(JavaSE,java中敏感词过滤)