note目录
WordMap.lua文件
WordMap = luaclass("WordMap")
function WordMap:Init()
self.isLast = false
self.map = {}
return self
end
return WordMap
过滤词的单个结构包括2个元素:
1:isLast是否是为节点
2:子节点-是一张表table{},用单个字符做key存放WordMap的表
假设过滤词库的里面有如下过滤词:
local configFilter = {}
configFilter[1] =
{
filterWord = "李泽东,李泽西,李泽北,李泽南,李克勤,周树人,周就,周佳佳"
}
构造的过滤词库的树结构:
完整的过滤词库代码:
WordFilterManager.lua
WordFilterManager = {}
local WordMap = require("WordMap")
--根据过滤词库来初始化过滤词库群树结构
function WordFilterManager.InitWordFilter()
-- 初始化一张过滤词树空结构
WordFilterManager.wordMap = WordMap():Init()
local str = configFilter[1].filterWord
local strArr = StringUtil.Split(str,",")
for i = 1,#strArr do
local wordItem = strArr[i]
if wordItem ~= "" then
WordFilterManager.AddWordsChar(WordFilterManager.wordMap , wordItem , 1)
end
end
end
--添加单个字符到过滤词树的结构中
function WordFilterManager.AddWordsChar(wordMap , word , charIdIndex)
local map = wordMap.map
local singleChar = StringUtil.utf8sub(word,charIdIndex , 1)
if map[singleChar] == nil then
map[singleChar] = WordMap():Init()
end
local subWordMap = map[singleChar]
--单个词条是最后一个字符,则是叶子节点
if charIdIndex == StringUtil.utf8len(word) then
subWordMap.isLast = true
end
if charIdIndex < StringUtil.utf8len(word) then
WordFilterManager.AddWordsChar(subWordMap,word,charIdIndex + 1)
end
end
--判断一个字符串是否有过滤词存在,并同时返回过滤词的字符个数
function WordFilterManager.CheckWord(wordMap , word , beginIndex , filterCounts)
local map = wordMap.map
local singleChar = StringUtil.utf8sub(word,beginIndex,1)
if map[singleChar] ~= nil then
filterCount = filterCount + 1
if map[singleChar].isLast then
return true,filterCount
else
if beginIndex > StringUtil.utf8len(word) then
return false , 0
else
WordFilterManager.CheckWord(map[singleChar],word,beginIndex + 1,filterCount)
end
end
end
rturn false , 0
end
-- 判断一个字符串是否有过滤词存在
function WordFilterManager.IsContains(str)
local len = StringUtile.utf8len(str)
for i = 1,len do
if WordFilterManager.CheckWord(WordFilterManager.wordMap,str,i,0) then
rturn true
end
end
rturn false
end
--将给过来的字符串中的所有的过滤词改为"***"
function WordFilterManager.Filter(str)
local len = StringUtil.utf8len(str)
for i = 1,len then
local isFilter,filterCount = WordFilterManager.CheckWord(WordFilterManager.wordMap,str,i,0)
if isFilter then
local filterWord = ""
for i = 1,filterCount do
filterWord = filterWord .. "*"
end
str = string.sub(str,StringUtil.utf8sub(str,i,filterCount),filterWord)
i = i + filterCount - 1
end
return str
end
local str = "aaa我是李泽西啊aaa"
local isContainsFilterWord = WordFilterManager.IsContains(str)
local beforeFilterWord = WordFilterManager.Filter(str)
print(tostring(isContainsFilterWord)) ---> output: true
print(beforeFilterWord) ---> output:aaa我是***啊aaa