JAVA 敏感词过滤

在网站中进行敏感词过滤是非常必要的。

先写一个过滤器

public class WordValidator implements Filter{

private WordValidatorUtil wordValidatorUtil=new WordValidatorUtil(); 

@Override

public void destroy() {

wordValidatorUtil=null;

}

@Override

public void doFilter(ServletRequest arg0, ServletResponse arg1,

FilterChain arg2) throws IOException, ServletException {

HttpServletRequest request= (HttpServletRequest) arg0;

HttpServletResponse response = (HttpServletResponse) arg1;

String txt=request.getParameter("txt");

if(txt!=null)

{

Boolean bool = wordValidatorUtil.isContaintSensitiveWord(txt, 1);

if(bool==true){

System.out.println("有敏感词");

PrintWriter writer=response.getWriter();

writer.print(common.util.Status.SENSITIVE_WORD_TRUE);

writer.flush();

writer.close();

}else {

System.out.println("没有敏感词");

arg2.doFilter(arg0, arg1);

}

}else {

arg2.doFilter(arg0, arg1);

}

return;

}

@Override

public void init(FilterConfig arg0) throws ServletException {

}

}


//WordValidatorUtil 类 判断输入是否包含关键字

public class WordValidatorUtil {

private Map sensitiveWordMap = null;

public static int minMatchTYpe = 1;      //最小匹配规则

public static int maxMatchType = 2;      //最大匹配规则

/**

* 构造函数,初始化敏感词库

*/

public WordValidatorUtil(){

WordValidatorInit wordValidatorInit=new WordValidatorInit();

sensitiveWordMap=wordValidatorInit.initKeyWord();

}

/**

* 判断文字是否包含敏感字符

* @param txt  文字

* @param matchType  匹配规则 1:最小匹配规则,2:最大匹配规则

* @return 若包含返回true,否则返回false

* @version 1.0

*/

public boolean isContaintSensitiveWord(String txt,int matchType){

boolean flag = false;

for(int i = 0 ; i < txt.length() ; i++){

int matchFlag = this.CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符

if(matchFlag > 0){    //大于0存在,返回true

flag = true;

}

}

return flag;

}

/**

* 检查文字中是否包含敏感字符,检查规则如下:<br>

* @author chenming 

* @param beginIndex

* @param matchType

* @return,如果存在,则返回敏感词字符的长度,不存在返回0

*/

public int CheckSensitiveWord(String txt,int beginIndex,int matchType){

boolean  flag = false;    //敏感词结束标识位:用于敏感词只有1位的情况

int matchFlag = 0;     //匹配标识数默认为0

char word = 0;

Map nowMap = sensitiveWordMap;

for(int i = beginIndex; i < txt.length() ; i++){

word = txt.charAt(i);

nowMap = (Map) nowMap.get(word);     //获取指定key

if(nowMap != null){     //存在,则判断是否为最后一个

matchFlag++;     //找到相应key,匹配标识+1 

if("1".equals(nowMap.get("isEnd"))){       //如果为最后一个匹配规则,结束循环,返回匹配标识数

flag = true;       //结束标志位为true   

if(WordValidatorUtil.minMatchTYpe == matchType){    //最小规则,直接返回,最大规则还需继续查找

break;

}

}

}

else{     //不存在,直接返回

break;

}

}

if(matchFlag < 2 || !flag){        //长度必须大于等于1,为词 

matchFlag = 0;

}

return matchFlag;

}

}


//敏感词库初始化

public class WordValidatorInit {

private String ENCODING = "UTF-8";    //字符编码

public HashMap sensitiveWordMap;

/**

* @version 1.0

*/

public Map initKeyWord(){

try {

//读取敏感词库

Set<String> keyWordSet = readSensitiveWordFile();

//将敏感词库加入到HashMap中

addSensitiveWordToHashMap(keyWordSet);

//spring获取application,然后application.setAttribute("sensitiveWordMap",sensitiveWordMap);

} catch (Exception e) {

e.printStackTrace();

}

return sensitiveWordMap;

}


/**

* @param keyWordSet  敏感词库

* @version 1.0

*/

private void addSensitiveWordToHashMap(Set<String> keyWordSet) {

sensitiveWordMap = new HashMap(keyWordSet.size());     //初始化敏感词容器,减少扩容操作

String key = null;  

Map nowMap = null;

Map<String, String> newWorMap = null;

//迭代keyWordSet

Iterator<String> iterator = keyWordSet.iterator();

while(iterator.hasNext()){

key = iterator.next();    //关键字

nowMap = sensitiveWordMap;

for(int i = 0 ; i < key.length() ; i++){

char keyChar = key.charAt(i);       //转换成char型

Object wordMap = nowMap.get(keyChar);       //获取

if(wordMap != null){        //如果存在该key,直接赋值

nowMap = (Map) wordMap;

}

else{     //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个

newWorMap = new HashMap<String,String>();

newWorMap.put("isEnd", "0");     //不是最后一个

nowMap.put(keyChar, newWorMap);

nowMap = newWorMap;

}

if(i == key.length() - 1){

nowMap.put("isEnd", "1");    //最后一个

}

}

}

}


/**

* 读取敏感词库中的内容,将内容添加到set集合中

* @return

* @version 1.0

* @throws Exception 

*/

private Set<String> readSensitiveWordFile() throws Exception{

Set<String> set = null;

File file = new File(getClass().getClassLoader().getResource("SensitiveWord.txt").getPath());    //读取文件

InputStreamReader read = new InputStreamReader(new FileInputStream(file),ENCODING);

try {

if(file.isFile() && file.exists()){      //文件流是否存在

set = new HashSet<String>();

BufferedReader bufferedReader = new BufferedReader(read);

String txt = null;

while((txt = bufferedReader.readLine()) != null){    //读取文件,将文件内容放入到set中

set.add(txt);

   }

}

else{         //不存在抛出异常信息

throw new Exception("敏感词库文件不存在");

}

} catch (Exception e) {

throw e;

}finally{

read.close();     //关闭文件流

}

return set;

}

}


你可能感兴趣的:(JAVA 敏感词过滤)