DFA有穷自动机敏感词过滤算法

1.EndType

package com.example.utils.wordfilter;

/**
 * 结束类型定义
 */
public enum EndType {

    /**
     * 有下一个,结束
     */
    HAS_NEXT, IS_END
}

2.WordType

package com.example.utils.wordfilter;

/**
 * 词汇类型
 */
public enum WordType {

    /**
     * 黑名单/白名单
     */
    BLACK, WHITE
}

3.FlagIndex

package com.example.utils.wordfilter;

import java.util.List;

/**
 * 敏感词标记
 */
public class FlagIndex {

    /**
     * 标记结果
     */
    private boolean flag;
    /**
     * 是否黑名单词汇
     */
    private boolean isWhiteWord;
    /**
     * 标记索引
     */
    private List index;

    public boolean isFlag() {
        return flag;
    }

    public void setFlag(boolean flag) {
        this.flag = flag;
    }

    public List getIndex() {
        return index;
    }

    public void setIndex(List index) {
        this.index = index;
    }

    public boolean isWhiteWord() {
        return isWhiteWord;
    }

    public void setWhiteWord(boolean whiteWord) {
        isWhiteWord = whiteWord;
    }
}

4.WordContext

package com.example.utils.wordfilter;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.*;

/**
 * 词库上下文环境
 * 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型
 */
@SuppressWarnings({"rawtypes", "unchecked"})
public class WordContext {

    /**
     * 敏感词字典
     */
    private final Map wordMap = new HashMap();

    /**
     * 是否已初始化
     */
    private boolean init;
    /**
     * 黑名单列表
     */
    private final String blackList;
    /**
     * 白名单列表
     */
    private final String whiteList;

    public WordContext() {
        this.blackList = "/blacklist.txt";
        this.whiteList = "/whitelist.txt";
        initKeyWord();
    }

    public WordContext(String blackList, String whiteList) {
        this.blackList = blackList;
        this.whiteList = whiteList;
        initKeyWord();
    }

    /**
     * 获取初始化的敏感词列表
     *
     * @return 敏感词列表
     */
    public Map getWordMap() {
        return wordMap;
    }

    /**
     * 初始化
     */
    private synchronized void initKeyWord() {
        try {
            if (!init) {
                // 将敏感词库加入到HashMap中
                addWord(readWordFile(blackList), WordType.BLACK);
                // 将非敏感词库也加入到HashMap中
                addWord(readWordFile(whiteList), WordType.WHITE);
            }
            init = true;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
* 中 = { isEnd = 0 国 = {
* isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd = 1 } * } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1 } } } } */ public void addWord(Iterable wordList, WordType wordType) { Map nowMap; Map newWorMap;

你可能感兴趣的:(算法,算法,DFA,敏感词,过滤)