1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
package
dfa
;
import
java
.
util
.
ArrayList
;
import
java
.
util
.
HashMap
;
import
java
.
util
.
HashSet
;
import
java
.
util
.
List
;
import
java
.
util
.
Set
;
@
SuppressWarnings
(
{
"rawtypes"
,
"unchecked"
}
)
public
class
KeywordFilter
{
/** 直接禁止的 */
private
HashMap
keysMap
=
new
HashMap
(
)
;
private
int
matchType
=
1
;
// 1:最小长度匹配 2:最大长度匹配
public
void
addKeywords
(
List
<
String
>
keywords
)
{
for
(
int
i
=
0
;
i
<
keywords
.
size
(
)
;
i
++
)
{
String
key
=
keywords
.
get
(
i
)
.
trim
(
)
;
HashMap
nowhash
=
null
;
nowhash
=
keysMap
;
for
(
int
j
=
0
;
j
<
key
.
length
(
)
;
j
++
)
{
char
word
=
key
.
charAt
(
j
)
;
Object
wordMap
=
nowhash
.
get
(
word
)
;
if
(
wordMap
!=
null
)
{
nowhash
=
(
HashMap
)
wordMap
;
}
else
{
HashMap
<
String
,
String
>
newWordHash
=
new
HashMap
<
String
,
String
>
(
)
;
newWordHash
.
put
(
"isEnd"
,
"0"
)
;
nowhash
.
put
(
word
,
newWordHash
)
;
nowhash
=
newWordHash
;
}
if
(
j
==
key
.
length
(
)
-
1
)
{
nowhash
.
put
(
"isEnd"
,
"1"
)
;
}
}
}
}
/**
* 重置关键词
*/
public
void
clearKeywords
(
)
{
keysMap
.
clear
(
)
;
}
/**
* 检查一个字符串从begin位置起开始是否有keyword符合, 如果有符合的keyword值,返回值为匹配keyword的长度,否则返回零
* flag 1:最小长度匹配 2:最大长度匹配
*/
private
int
checkKeyWords
(
String
txt
,
int
begin
,
int
flag
)
{
HashMap
nowhash
=
null
;
nowhash
=
keysMap
;
int
maxMatchRes
=
0
;
int
res
=
0
;
int
l
=
txt
.
length
(
)
;
char
word
=
0
;
for
(
int
i
=
begin
;
i
<
l
;
i
++
)
{
word
=
txt
.
charAt
(
i
)
;
Object
wordMap
=
nowhash
.
get
(
word
)
;
if
(
wordMap
!=
null
)
{
res
++
;
nowhash
=
(
HashMap
)
wordMap
;
if
(
(
(
String
)
nowhash
.
get
(
"isEnd"
)
)
.
equals
(
"1"
)
)
{
if
(
flag
==
1
)
{
wordMap
=
null
;
nowhash
=
null
;
txt
=
null
;
return
res
;
}
else
{
maxMatchRes
=
res
;
}
}
}
else
{
txt
=
null
;
nowhash
=
null
;
return
maxMatchRes
;
}
}
txt
=
null
;
nowhash
=
null
;
return
maxMatchRes
;
}
/**
* 返回txt中关键字的列表
*/
public
Set
<
String
>
getTxtKeyWords
(
String
txt
)
{
Set
set
=
new
HashSet
(
)
;
int
l
=
txt
.
length
(
)
;
for
(
int
i
=
0
;
i
<
l
;
)
{
int
len
=
checkKeyWords
(
txt
,
i
,
matchType
)
;
if
(
len
>
0
)
{
set
.
add
(
txt
.
substring
(
i
,
i
+
len
)
)
;
i
+=
len
;
}
else
{
i
++
;
}
}
txt
=
null
;
return
set
;
}
/**
* 仅判断txt中是否有关键字
*/
public
boolean
isContentKeyWords
(
String
txt
)
{
for
(
int
i
=
0
;
i
<
txt
.
length
(
)
;
i
++
)
{
int
len
=
checkKeyWords
(
txt
,
i
,
1
)
;
if
(
len
>
0
)
{
return
true
;
}
}
txt
=
null
;
return
false
;
}
public
int
getMatchType
(
)
{
return
matchType
;
}
public
void
setMatchType
(
int
matchType
)
{
this
.
matchType
=
matchType
;
}
public
static
void
main
(
String
[
]
args
)
{
KeywordFilter
filter
=
new
KeywordFilter
(
)
;
List
<
String
>
keywords
=
new
ArrayList
<
String
>
(
)
;
keywords
.
add
(
"中国人"
)
;
keywords
.
add
(
"中国男人"
)
;
filter
.
addKeywords
(
keywords
)
;
String
txt
=
"中国人民站起来了"
;
boolean
boo
=
filter
.
isContentKeyWords
(
txt
)
;
System
.
out
.
println
(
boo
)
;
Set
set
=
filter
.
getTxtKeyWords
(
txt
)
;
System
.
out
.
println
(
set
)
;
}
}
|
本文固定链接: http://blog.shilimin.com/298.htm | 博客